realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,1309 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ ResearchPipeline 2.0 - Research workflow based on dynamic topic queue
5
+ Coordinates three stages: Planning -> Researching -> Reporting
6
+ """
7
+
8
+ import asyncio
9
+ from datetime import datetime
10
+ import json
11
+ from pathlib import Path
12
+ import sys
13
+ from typing import Any, Callable
14
+
15
+
16
+ def _get_project_root() -> Path:
17
+ """
18
+ Get project root directory robustly by looking for marker files.
19
+ Works regardless of how the script is invoked.
20
+ """
21
+ # Start from current file's directory
22
+ current = Path(__file__).resolve().parent
23
+
24
+ # Walk up looking for project markers (pyproject.toml, requirements.txt, or src/ directory)
25
+ markers = ["pyproject.toml", "requirements.txt", ".git"]
26
+
27
+ for _ in range(10): # Limit to 10 levels up
28
+ for marker in markers:
29
+ if (current / marker).exists():
30
+ return current
31
+ parent = current.parent
32
+ if parent == current: # Reached filesystem root
33
+ break
34
+ current = parent
35
+
36
+ # Fallback: use relative path from this file
37
+ # This file is at: src/agents/research/research_pipeline.py
38
+ # So project root is: ../../../
39
+ return Path(__file__).resolve().parent.parent.parent.parent
40
+
41
+
42
+ # Get project root
43
+ PROJECT_ROOT = _get_project_root()
44
+
45
+ # Add project root to path for imports
46
+ sys.path.insert(0, str(PROJECT_ROOT))
47
+
48
+ from src.agents.research.agents import (
49
+ DecomposeAgent,
50
+ ManagerAgent,
51
+ NoteAgent,
52
+ RephraseAgent,
53
+ ReportingAgent,
54
+ ResearchAgent,
55
+ )
56
+ from src.agents.research.data_structures import DynamicTopicQueue
57
+ from src.agents.research.utils.citation_manager import CitationManager
58
+ from src.logging import get_logger
59
+ from src.tools.code_executor import run_code
60
+ from src.tools.paper_search_tool import PaperSearchTool
61
+ from src.tools.query_item_tool import query_numbered_item
62
+ from src.tools.rag_tool import rag_search
63
+ from src.tools.web_search import web_search
64
+
65
+
66
+ class ResearchPipeline:
67
+ """DR-in-KG 2.0 Research workflow"""
68
+
69
+ def __init__(
70
+ self,
71
+ config: dict[str, Any],
72
+ api_key: str,
73
+ base_url: str,
74
+ api_version: str | None = None,
75
+ research_id: str | None = None,
76
+ kb_name: str | None = None,
77
+ progress_callback: Callable | None = None,
78
+ ):
79
+ """
80
+ Initialize research workflow
81
+
82
+ Args:
83
+ config: Configuration dictionary
84
+ api_key: API key
85
+ base_url: API endpoint
86
+ api_version: API version (for Azure OpenAI)
87
+ research_id: Research task ID (optional)
88
+ kb_name: Knowledge base name (optional, if provided overrides config file setting)
89
+ progress_callback: Progress callback function (optional), signature: callback(event: Dict[str, Any])
90
+ """
91
+ self.config = config
92
+ self.progress_callback = progress_callback
93
+
94
+ # If kb_name is provided, override config
95
+ if kb_name is not None:
96
+ if "rag" not in self.config:
97
+ self.config["rag"] = {}
98
+ self.config["rag"]["kb_name"] = kb_name
99
+ self.api_key = api_key
100
+ self.base_url = base_url
101
+ self.api_version = api_version or config.get("llm", {}).get("api_version")
102
+ self.input_topic: str | None = None
103
+ self.optimized_topic: str | None = None
104
+
105
+ # Generate research ID
106
+ if research_id is None:
107
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
108
+ self.research_id = f"research_{timestamp}"
109
+ else:
110
+ self.research_id = research_id
111
+
112
+ # Set directories
113
+ system_config = config.get("system", {})
114
+ self.cache_dir = Path(system_config.get("output_base_dir", "./cache")) / self.research_id
115
+ self.reports_dir = Path(system_config.get("reports_dir", "./reports"))
116
+
117
+ # Create directories
118
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
119
+ self.reports_dir.mkdir(parents=True, exist_ok=True)
120
+ self.plan_progress_file = self.cache_dir / "planning_progress.json"
121
+ self.report_progress_file = self.cache_dir / "reporting_progress.json"
122
+ self.queue_progress_file = self.cache_dir / "queue_progress.json"
123
+ self._stage_events: dict[str, list[dict[str, Any]]] = {
124
+ "planning": [],
125
+ "reporting": [],
126
+ }
127
+
128
+ # Initialize queue
129
+ queue_cfg = config.get("queue", {})
130
+ self.queue = DynamicTopicQueue(
131
+ self.research_id,
132
+ max_length=queue_cfg.get("max_length"),
133
+ state_file=str(self.queue_progress_file),
134
+ )
135
+
136
+ # Initialize unified logging system (must be before _init_agents)
137
+ self._init_logger()
138
+
139
+ # Initialize Agents
140
+ self.agents = {}
141
+ self._init_agents()
142
+
143
+ # Tool instances
144
+ self._paper_tool: PaperSearchTool | None = None
145
+
146
+ # Citation manager
147
+ self.citation_manager = CitationManager(self.research_id, self.cache_dir)
148
+
149
+ # Lock for thread-safe progress file writing in parallel mode
150
+ import threading
151
+
152
+ self._progress_file_lock = threading.Lock()
153
+
154
+ def _init_logger(self):
155
+ """Initialize unified logging system"""
156
+ # Get log_dir from config paths (user_log_dir from main.yaml)
157
+ log_dir = self.config.get("paths", {}).get("user_log_dir") or self.config.get(
158
+ "logging", {}
159
+ ).get("log_dir")
160
+
161
+ self.logger = get_logger(name="Research", log_dir=log_dir)
162
+ self.logger.success("Logger initialized")
163
+
164
+ def _init_agents(self):
165
+ """Initialize all Agents"""
166
+ if self.logger:
167
+ self.logger.info("Initializing Agents...")
168
+
169
+ self.agents = {
170
+ "rephrase": RephraseAgent(
171
+ self.config, self.api_key, self.base_url, api_version=self.api_version
172
+ ),
173
+ "decompose": DecomposeAgent(
174
+ self.config, self.api_key, self.base_url, api_version=self.api_version
175
+ ),
176
+ "manager": ManagerAgent(
177
+ self.config, self.api_key, self.base_url, api_version=self.api_version
178
+ ),
179
+ "research": ResearchAgent(
180
+ self.config, self.api_key, self.base_url, api_version=self.api_version
181
+ ),
182
+ "note": NoteAgent(
183
+ self.config, self.api_key, self.base_url, api_version=self.api_version
184
+ ),
185
+ "reporting": ReportingAgent(
186
+ self.config, self.api_key, self.base_url, api_version=self.api_version
187
+ ),
188
+ }
189
+
190
+ # Set Manager's queue
191
+ self.agents["manager"].set_queue(self.queue)
192
+
193
+ if self.logger:
194
+ self.logger.success(f"Initialized {len(self.agents)} Agents")
195
+
196
+ async def _call_tool_with_timeout(
197
+ self, coro, timeout: float = 60.0, tool_name: str = "tool"
198
+ ) -> Any:
199
+ """
200
+ Execute a coroutine with timeout support
201
+
202
+ Args:
203
+ coro: Coroutine to execute
204
+ timeout: Timeout in seconds (default 60s)
205
+ tool_name: Name of the tool for logging
206
+
207
+ Returns:
208
+ Result of the coroutine
209
+
210
+ Raises:
211
+ asyncio.TimeoutError: If timeout exceeded
212
+ """
213
+ try:
214
+ return await asyncio.wait_for(coro, timeout=timeout)
215
+ except asyncio.TimeoutError:
216
+ self.logger.warning(f"Tool {tool_name} timed out after {timeout}s")
217
+ raise
218
+
219
+ async def _call_tool_with_retry(
220
+ self,
221
+ tool_func,
222
+ *args,
223
+ max_retries: int = 2,
224
+ timeout: float = 60.0,
225
+ tool_name: str = "tool",
226
+ **kwargs,
227
+ ) -> Any:
228
+ """
229
+ Call a tool function with retry and timeout support
230
+
231
+ Args:
232
+ tool_func: Tool function to call
233
+ *args: Positional arguments for the function
234
+ max_retries: Maximum number of retries (default 2)
235
+ timeout: Timeout per attempt in seconds (default 60s)
236
+ tool_name: Name of the tool for logging
237
+ **kwargs: Keyword arguments for the function
238
+
239
+ Returns:
240
+ Result of the tool function
241
+ """
242
+ last_error = None
243
+
244
+ for attempt in range(max_retries + 1):
245
+ try:
246
+ if asyncio.iscoroutinefunction(tool_func):
247
+ result = await self._call_tool_with_timeout(
248
+ tool_func(*args, **kwargs), timeout=timeout, tool_name=tool_name
249
+ )
250
+ else:
251
+ # For sync functions, run in executor
252
+ import functools
253
+
254
+ loop = asyncio.get_event_loop()
255
+ result = await asyncio.wait_for(
256
+ loop.run_in_executor(None, functools.partial(tool_func, *args, **kwargs)),
257
+ timeout=timeout,
258
+ )
259
+ return result
260
+ except asyncio.TimeoutError as e:
261
+ last_error = e
262
+ if attempt < max_retries:
263
+ self.logger.warning(
264
+ f"Tool {tool_name} attempt {attempt + 1} timed out, retrying..."
265
+ )
266
+ await asyncio.sleep(1) # Brief pause before retry
267
+ except Exception as e:
268
+ last_error = e
269
+ if attempt < max_retries:
270
+ self.logger.warning(
271
+ f"Tool {tool_name} attempt {attempt + 1} failed: {e}, retrying..."
272
+ )
273
+ await asyncio.sleep(1)
274
+
275
+ # All retries exhausted
276
+ self.logger.error(f"Tool {tool_name} failed after {max_retries + 1} attempts: {last_error}")
277
+ raise last_error if last_error else RuntimeError(f"{tool_name} failed")
278
+
279
+ async def _call_tool(self, tool_type: str, query: str) -> str:
280
+ """Call tool and return raw string answer (JSON string or text)"""
281
+ tool_type = (tool_type or "").lower()
282
+
283
+ # Get timeout and retry settings from config
284
+ tool_config = self.config.get("researching", {})
285
+ default_timeout = tool_config.get("tool_timeout", 60)
286
+ max_retries = tool_config.get("tool_max_retries", 2)
287
+
288
+ try:
289
+ if tool_type in ("rag_hybrid", "rag_naive", "rag"):
290
+ rag_cfg = self.config.get("rag", {})
291
+ kb_name = rag_cfg.get("kb_name", "DE-all")
292
+ default_mode = rag_cfg.get("default_mode", "hybrid")
293
+ fallback_mode = rag_cfg.get("fallback_mode", "naive")
294
+ if tool_type == "rag_hybrid":
295
+ mode = "hybrid"
296
+ elif tool_type == "rag_naive":
297
+ mode = "naive"
298
+ else:
299
+ mode = default_mode
300
+ try:
301
+ res = await self._call_tool_with_retry(
302
+ rag_search,
303
+ query=query,
304
+ kb_name=kb_name,
305
+ mode=mode,
306
+ max_retries=max_retries,
307
+ timeout=default_timeout,
308
+ tool_name=f"rag_search({mode})",
309
+ )
310
+ except Exception:
311
+ # Retry with fallback mode
312
+ res = await self._call_tool_with_retry(
313
+ rag_search,
314
+ query=query,
315
+ kb_name=kb_name,
316
+ mode=fallback_mode,
317
+ max_retries=1,
318
+ timeout=default_timeout,
319
+ tool_name=f"rag_search({fallback_mode})",
320
+ )
321
+ return json.dumps(res, ensure_ascii=False)
322
+
323
+ if tool_type == "web_search":
324
+ res = await self._call_tool_with_retry(
325
+ web_search,
326
+ query=query,
327
+ output_dir=str(self.cache_dir),
328
+ max_retries=max_retries,
329
+ timeout=default_timeout,
330
+ tool_name="web_search",
331
+ )
332
+ return json.dumps(res, ensure_ascii=False)
333
+
334
+ if tool_type == "query_item":
335
+ kb_name = self.config.get("rag", {}).get("kb_name", "ai_textbook")
336
+ res = await self._call_tool_with_retry(
337
+ query_numbered_item,
338
+ identifier=query,
339
+ kb_name=kb_name,
340
+ max_retries=max_retries,
341
+ timeout=default_timeout,
342
+ tool_name="query_item",
343
+ )
344
+ return json.dumps(res, ensure_ascii=False)
345
+
346
+ if tool_type == "paper_search":
347
+ if self._paper_tool is None:
348
+ self._paper_tool = PaperSearchTool()
349
+ years_limit = self.config.get("researching", {}).get("paper_search_years_limit", 3)
350
+ papers = await self._call_tool_with_retry(
351
+ self._paper_tool.search_papers,
352
+ query=query,
353
+ max_results=3,
354
+ years_limit=years_limit,
355
+ max_retries=max_retries,
356
+ timeout=default_timeout,
357
+ tool_name="paper_search",
358
+ )
359
+ return json.dumps({"papers": papers}, ensure_ascii=False)
360
+
361
+ if tool_type == "run_code":
362
+ # Code execution has its own internal timeout (10s), wrapper timeout is 30s
363
+ result = await self._call_tool_with_retry(
364
+ run_code,
365
+ language="python",
366
+ code=query,
367
+ max_retries=1,
368
+ timeout=30, # Wrapper timeout
369
+ tool_name="run_code",
370
+ )
371
+ return json.dumps(result, ensure_ascii=False)
372
+
373
+ # Default fallback to RAG hybrid
374
+ kb_name = self.config.get("rag", {}).get("kb_name", "ai_textbook")
375
+ res = await self._call_tool_with_retry(
376
+ rag_search,
377
+ query=query,
378
+ kb_name=kb_name,
379
+ mode="hybrid",
380
+ max_retries=max_retries,
381
+ timeout=default_timeout,
382
+ tool_name="rag_search(hybrid)",
383
+ )
384
+ return json.dumps(res, ensure_ascii=False)
385
+ except Exception as e:
386
+ return json.dumps(
387
+ {"status": "failed", "error": str(e), "tool": tool_type, "query": query},
388
+ ensure_ascii=False,
389
+ )
390
+
391
+ async def run(self, topic: str) -> dict[str, Any]:
392
+ """
393
+ Execute complete research workflow
394
+
395
+ Args:
396
+ topic: Research topic
397
+
398
+ Returns:
399
+ Research result
400
+ """
401
+ if self.logger:
402
+ self.logger.section("DR-in-KG 2.0 - Deep Research System Based on Dynamic Topic Queue")
403
+ self.logger.info(f"Research Topic: {topic}")
404
+ self.logger.info(f"Research ID: {self.research_id}")
405
+ self.input_topic = topic
406
+
407
+ try:
408
+ # ========== Phase 1: Planning (Planning and Initialization) ==========
409
+ self.logger.info("\n" + "═" * 70)
410
+ self.logger.info("▶ Phase 1: Planning - Planning and Initialization")
411
+ self.logger.info("═" * 70)
412
+
413
+ optimized_topic = await self._phase1_planning(topic)
414
+
415
+ # ========== Phase 2: Researching (Dynamic Research Loop) ==========
416
+ self.logger.info("\n" + "═" * 70)
417
+ self.logger.info("▶ Phase 2: Researching - Dynamic Research Loop")
418
+ self.logger.info("═" * 70)
419
+
420
+ await self._phase2_researching()
421
+
422
+ # ========== Phase 3: Reporting (Report Generation) ==========
423
+ self.logger.info("\n" + "═" * 70)
424
+ self.logger.info("▶ Phase 3: Reporting - Report Generation")
425
+ self.logger.info("═" * 70)
426
+
427
+ report_result = await self._phase3_reporting(optimized_topic)
428
+
429
+ # ========== Save Results ==========
430
+ self.logger.info("\n" + "═" * 70)
431
+ self.logger.info("▶ Save Results")
432
+ self.logger.info("═" * 70 + "\n")
433
+
434
+ report_file = self.reports_dir / f"{self.research_id}.md"
435
+ with open(report_file, "w", encoding="utf-8") as f:
436
+ f.write(report_result["report"])
437
+ self.logger.success(f"Final Report: {report_file}")
438
+
439
+ # Save queue
440
+ queue_file = self.cache_dir / "queue.json"
441
+ self.queue.save_to_json(str(queue_file))
442
+ self.logger.success(f"Queue Data: {queue_file}")
443
+
444
+ # Save outline (if exists)
445
+ if "outline" in report_result:
446
+ outline_file = self.cache_dir / "outline.json"
447
+ with open(outline_file, "w", encoding="utf-8") as f:
448
+ json.dump(report_result["outline"], f, ensure_ascii=False, indent=2)
449
+ self.logger.success(f"Report Outline: {outline_file}")
450
+
451
+ # Save metadata
452
+ metadata = {
453
+ "research_id": self.research_id,
454
+ "topic": topic,
455
+ "optimized_topic": optimized_topic,
456
+ "statistics": self.queue.get_statistics(),
457
+ "report_word_count": report_result["word_count"],
458
+ "completed_at": datetime.now().isoformat(),
459
+ }
460
+
461
+ metadata_file = self.reports_dir / f"{self.research_id}_metadata.json"
462
+ with open(metadata_file, "w", encoding="utf-8") as f:
463
+ json.dump(metadata, f, ensure_ascii=False, indent=2)
464
+ self.logger.success(f"Metadata: {metadata_file}")
465
+
466
+ # ===== Token Cost Statistics =====
467
+ try:
468
+ from src.agents.research.utils.token_tracker import get_token_tracker
469
+
470
+ tracker = get_token_tracker()
471
+ cost_summary = tracker.format_summary()
472
+ self.logger.info(cost_summary)
473
+ cost_file = self.cache_dir / "token_cost_summary.json"
474
+ tracker.save(str(cost_file))
475
+ self.logger.success(f"Cost statistics saved: {cost_file}")
476
+ except Exception as _e:
477
+ self.logger.warning(f"Cost statistics failed: {_e}")
478
+
479
+ self.logger.info("\n" + "=" * 70)
480
+ self.logger.success("Research Completed!")
481
+ self.logger.info("=" * 70)
482
+ self.logger.info(f"Research ID: {self.research_id}")
483
+ self.logger.info(f"Topic: {topic}")
484
+ self.logger.info(f"Final Report: {report_file}")
485
+ self.logger.info(f"Report Word Count: {report_result['word_count']}")
486
+ self.logger.info(f"Topic Blocks: {len(self.queue.blocks)}")
487
+ self.logger.info("=" * 70 + "\n")
488
+
489
+ return {
490
+ "research_id": self.research_id,
491
+ "topic": topic,
492
+ "final_report_path": str(report_file),
493
+ "metadata": metadata,
494
+ }
495
+
496
+ except KeyboardInterrupt:
497
+ self.logger.warning("\n\n⚠️ Research interrupted by user")
498
+ sys.exit(0)
499
+ except Exception as e:
500
+ self.logger.error(f"\n\n✗ Research failed: {e!s}")
501
+ import traceback
502
+
503
+ self.logger.error(traceback.format_exc())
504
+ raise
505
+
506
+ async def _phase1_planning(self, topic: str) -> str:
507
+ """
508
+ Phase 1: Planning and Initialization
509
+
510
+ Args:
511
+ topic: User input topic
512
+
513
+ Returns:
514
+ Optimized topic
515
+ """
516
+ self._log_progress("planning", "planning_started", user_topic=topic)
517
+
518
+ # Check if topic rephrasing is enabled
519
+ rephrase_config = self.config.get("planning", {}).get("rephrase", {})
520
+ rephrase_enabled = rephrase_config.get("enabled", True)
521
+
522
+ if rephrase_enabled:
523
+ self.logger.info("\n【Step 1】Topic Rephrasing...")
524
+
525
+ # Use RephraseAgent to optimize topic (supports user interaction)
526
+ max_iterations = rephrase_config.get("max_iterations", 3)
527
+
528
+ rephrase_result = None
529
+ iteration = 0
530
+ user_feedback = None # Initialize user feedback variable
531
+
532
+ # Check if running in frontend mode (has progress_callback)
533
+ # In frontend mode, skip interactive input loop - user controls via frontend UI
534
+ is_frontend_mode = self.progress_callback is not None
535
+
536
+ while iteration < max_iterations:
537
+ # Execute rephrasing
538
+ if iteration == 0:
539
+ rephrase_result = await self.agents["rephrase"].process(
540
+ topic, iteration=iteration
541
+ )
542
+ # Continue rephrasing based on user feedback
543
+ elif user_feedback:
544
+ rephrase_result = await self.agents["rephrase"].process(
545
+ user_feedback, iteration=iteration, previous_result=rephrase_result
546
+ )
547
+ else:
548
+ # If no feedback, use previous result
549
+ break
550
+
551
+ iteration += 1
552
+
553
+ # In frontend mode, only do one iteration and exit
554
+ # User will control further iterations via frontend UI (/optimize_topic API)
555
+ if is_frontend_mode:
556
+ self.logger.info(f"\n{'=' * 70}")
557
+ self.logger.info("📋 Rephrase Result (Frontend Mode):")
558
+ self.logger.info(f"{'=' * 70}")
559
+ self.logger.info(
560
+ f"Optimized Research Topic: {rephrase_result.get('topic', '')}"
561
+ )
562
+ self.logger.info(f"{'=' * 70}")
563
+ self.logger.success(
564
+ "Frontend mode: Using current result, proceeding to next stage"
565
+ )
566
+ break
567
+
568
+ # CLI mode: Ask user opinion (unless max iterations reached)
569
+ if iteration < max_iterations:
570
+ self.logger.info(f"\n{'=' * 70}")
571
+ self.logger.info("📋 Current Rephrasing Result:")
572
+ self.logger.info(f"{'=' * 70}")
573
+ self.logger.info(
574
+ f"Optimized Research Topic: {rephrase_result.get('topic', '')}"
575
+ )
576
+ self.logger.info(f"{'=' * 70}")
577
+ self.logger.info("\n💬 Are you satisfied with this rephrasing result?")
578
+ self.logger.info(
579
+ " - Enter 'satisfied', 'ok', etc. to indicate satisfaction, will proceed to next stage"
580
+ )
581
+ self.logger.info(
582
+ " - Enter specific modification suggestions, will continue optimizing based on your feedback"
583
+ )
584
+ self.logger.info(" - Press Enter directly to use current result")
585
+
586
+ user_input = input("\nYour choice: ").strip()
587
+
588
+ if not user_input:
589
+ self.logger.success("Using current result, proceeding to next stage")
590
+ break
591
+
592
+ # Determine user intent
593
+ satisfaction = await self.agents["rephrase"].check_user_satisfaction(
594
+ rephrase_result, user_input
595
+ )
596
+
597
+ if satisfaction.get("user_satisfied", False):
598
+ self.logger.success("User satisfied, proceeding to next stage")
599
+ break
600
+
601
+ if not satisfaction.get("should_continue", True):
602
+ self.logger.success("Proceeding to next stage")
603
+ break
604
+
605
+ # Continue iteration, use user input as feedback
606
+ user_feedback = user_input
607
+
608
+ # Ensure there is a result
609
+ if rephrase_result is None:
610
+ rephrase_result = {"topic": topic}
611
+
612
+ optimized_topic = rephrase_result.get("topic", topic)
613
+ self._log_progress(
614
+ "planning",
615
+ "rephrase_completed",
616
+ optimized_topic=optimized_topic,
617
+ iterations=iteration,
618
+ )
619
+ else:
620
+ self.logger.info("\n【Step 1】Topic Rephrasing (disabled, skipping)...")
621
+ optimized_topic = topic
622
+ self._log_progress(
623
+ "planning",
624
+ "rephrase_skipped",
625
+ optimized_topic=optimized_topic,
626
+ reason="rephrase feature disabled",
627
+ )
628
+
629
+ self.optimized_topic = optimized_topic
630
+
631
+ self.logger.info("\n【Step 2】Topic Decomposition...")
632
+
633
+ # Use DecomposeAgent to decompose topic
634
+ decompose_config = self.config.get("planning", {}).get("decompose", {})
635
+ mode = decompose_config.get("mode", "manual")
636
+
637
+ if mode == "auto":
638
+ # Auto mode: use auto_max_subtopics as limit
639
+ num_subtopics = decompose_config.get(
640
+ "auto_max_subtopics", decompose_config.get("initial_subtopics", 5)
641
+ )
642
+ self.logger.info(f"📌 Using Auto mode, max subtopics: {num_subtopics}")
643
+ else:
644
+ # Manual mode: use initial_subtopics
645
+ num_subtopics = decompose_config.get("initial_subtopics", 5)
646
+ self.logger.info(f"📌 Using Manual mode, expected subtopics: {num_subtopics}")
647
+
648
+ self._log_progress(
649
+ "planning", "decompose_started", requested_subtopics=num_subtopics, mode=mode
650
+ )
651
+
652
+ # Set citation manager to DecomposeAgent
653
+ self.agents["decompose"].set_citation_manager(self.citation_manager)
654
+
655
+ decompose_result = await self.agents["decompose"].process(
656
+ topic=optimized_topic, num_subtopics=num_subtopics, mode=mode
657
+ )
658
+ self._log_progress(
659
+ "planning",
660
+ "decompose_completed",
661
+ generated_subtopics=decompose_result.get("total_subtopics", 0),
662
+ rag_context_length=len(decompose_result.get("rag_context", "") or ""),
663
+ )
664
+
665
+ # Save Planning stage results (includes sub_queries / rag_context / sub_topics)
666
+ try:
667
+ step1_path = self.cache_dir / "step1_planning.json"
668
+ with open(step1_path, "w", encoding="utf-8") as f:
669
+ json.dump(
670
+ {
671
+ "main_topic": optimized_topic,
672
+ "sub_queries": decompose_result.get("sub_queries", []),
673
+ "rag_context": decompose_result.get("rag_context", ""),
674
+ "sub_topics": decompose_result.get("sub_topics", []),
675
+ "total_subtopics": decompose_result.get("total_subtopics", 0),
676
+ "timestamp": datetime.now().isoformat(),
677
+ },
678
+ f,
679
+ ensure_ascii=False,
680
+ indent=2,
681
+ )
682
+ self.logger.success(f"Planning stage data saved: {step1_path}")
683
+ except Exception as _e:
684
+ self.logger.warning(f"Failed to save Planning stage data: {_e}")
685
+
686
+ # Add subtopics to queue
687
+ self.logger.info("\n【Step 3】Initializing Queue...")
688
+ for sub_topic_data in decompose_result.get("sub_topics", []):
689
+ title = (sub_topic_data.get("title") or "").strip()
690
+ overview = sub_topic_data.get("overview", "")
691
+ if not title:
692
+ continue
693
+ try:
694
+ block = self.queue.add_block(sub_topic=title, overview=overview)
695
+ self._log_progress(
696
+ "planning",
697
+ "queue_seeded",
698
+ block_id=block.block_id,
699
+ sub_topic=block.sub_topic,
700
+ total_blocks=len(self.queue.blocks),
701
+ )
702
+ except RuntimeError as err:
703
+ self._log_progress(
704
+ "planning", "queue_capacity_reached", error=str(err), attempted_topic=title
705
+ )
706
+ self.logger.warning(
707
+ f"Queue reached capacity limit, stopping addition of initial topics: {err}"
708
+ )
709
+ break
710
+
711
+ stats = self.queue.get_statistics()
712
+ self._log_progress("planning", "planning_completed", total_blocks=stats["total_blocks"])
713
+ self.logger.success("\nPhase 1 Completed:")
714
+ self.logger.info(f" - Optimized Topic: {optimized_topic}")
715
+ self.logger.info(f" - Subtopic Count: {stats['total_blocks']}")
716
+ self.agents["manager"].set_primary_topic(optimized_topic)
717
+
718
+ return optimized_topic
719
+
720
+ async def _phase2_researching(self):
721
+ """
722
+ Phase 2: Dynamic Research Loop
723
+ Routes to series or parallel execution based on configuration
724
+ """
725
+ execution_mode = self.config.get("researching", {}).get("execution_mode", "series")
726
+
727
+ if execution_mode == "parallel":
728
+ await self._phase2_researching_parallel()
729
+ else:
730
+ await self._phase2_researching_series()
731
+
732
+ async def _phase2_researching_series(self):
733
+ """
734
+ Phase 2: Dynamic Research Loop (Series Mode - Original Implementation)
735
+ """
736
+ # Initialize researching stage event list
737
+ if "researching" not in self._stage_events:
738
+ self._stage_events["researching"] = []
739
+
740
+ manager = self.agents["manager"]
741
+ research = self.agents["research"]
742
+
743
+ total_blocks = len(self.queue.blocks)
744
+ completed_blocks = 0
745
+
746
+ self._log_researching_progress(
747
+ "researching_started", total_blocks=total_blocks, execution_mode="series"
748
+ )
749
+
750
+ while not manager.is_research_complete():
751
+ # Get next task to research
752
+ block = manager.get_next_task()
753
+ if not block:
754
+ break
755
+
756
+ self._log_researching_progress(
757
+ "block_started",
758
+ block_id=block.block_id,
759
+ sub_topic=block.sub_topic,
760
+ current_block=completed_blocks + 1,
761
+ total_blocks=total_blocks,
762
+ )
763
+
764
+ # Create iteration progress callback for this block
765
+ iteration_callback = self._create_iteration_progress_callback(
766
+ block_id=block.block_id,
767
+ sub_topic=block.sub_topic,
768
+ execution_mode="series",
769
+ current_block=completed_blocks + 1,
770
+ total_blocks=total_blocks,
771
+ )
772
+
773
+ # Execute research loop (unified handling by ResearchAgent.process)
774
+ result = await research.process(
775
+ topic_block=block,
776
+ call_tool_callback=self._call_tool,
777
+ note_agent=self.agents["note"],
778
+ citation_manager=self.citation_manager,
779
+ queue=self.queue,
780
+ manager_agent=manager,
781
+ config=self.config,
782
+ progress_callback=iteration_callback,
783
+ )
784
+
785
+ # Mark as completed
786
+ manager.complete_task(block.block_id)
787
+ completed_blocks += 1
788
+
789
+ # Update total_blocks in case new topics were added
790
+ total_blocks = len(self.queue.blocks)
791
+
792
+ self._log_researching_progress(
793
+ "block_completed",
794
+ block_id=block.block_id,
795
+ sub_topic=block.sub_topic,
796
+ iterations=result.get("iterations", 0),
797
+ tools_used=result.get("tools_used", []),
798
+ queries_used=result.get("queries_used", []),
799
+ current_block=completed_blocks,
800
+ total_blocks=total_blocks,
801
+ )
802
+
803
+ # Display statistics
804
+ manager.get_queue_status()
805
+
806
+ stats = self.queue.get_statistics()
807
+ self._log_researching_progress(
808
+ "researching_completed",
809
+ completed_blocks=stats["completed"],
810
+ total_tool_calls=stats["total_tool_calls"],
811
+ )
812
+
813
+ self.logger.success("\nPhase 2 Completed:")
814
+ self.logger.info(f" - Completed Topics: {stats['completed']}")
815
+ self.logger.info(f" - Total Tool Calls: {stats['total_tool_calls']}")
816
+
817
+ async def _phase2_researching_parallel(self):
818
+ """
819
+ Phase 2: Dynamic Research Loop (Parallel Mode)
820
+ Executes multiple topic blocks in parallel with concurrency limit
821
+ """
822
+ # Initialize researching stage event list
823
+ if "researching" not in self._stage_events:
824
+ self._stage_events["researching"] = []
825
+
826
+ manager = self.agents["manager"]
827
+ research = self.agents["research"]
828
+
829
+ # Get configuration
830
+ max_parallel = self.config.get("researching", {}).get("max_parallel_topics", 5)
831
+ semaphore = asyncio.Semaphore(max_parallel)
832
+
833
+ # Get all pending blocks at the start
834
+ from src.agents.research.data_structures import TopicStatus
835
+
836
+ pending_blocks = [b for b in self.queue.blocks if b.status == TopicStatus.PENDING]
837
+ total_blocks = len(self.queue.blocks)
838
+
839
+ self.logger.info(
840
+ f"\n🚀 Starting parallel research mode (max {max_parallel} concurrent topics)"
841
+ )
842
+ self._log_researching_progress(
843
+ "researching_started",
844
+ total_blocks=total_blocks,
845
+ execution_mode="parallel",
846
+ max_parallel=max_parallel,
847
+ initial_pending=len(pending_blocks),
848
+ )
849
+
850
+ # Track completed blocks
851
+ completed_count = {"value": 0} # Use dict to allow modification in nested function
852
+
853
+ # Create async wrappers for thread-safe operations in parallel mode
854
+ class AsyncCitationManagerWrapper:
855
+ """Wrapper to use async citation manager methods in parallel mode"""
856
+
857
+ def __init__(self, cm):
858
+ self._cm = cm
859
+
860
+ async def add_citation(self, citation_id, tool_type, tool_trace, raw_answer):
861
+ return await self._cm.add_citation_async(
862
+ citation_id, tool_type, tool_trace, raw_answer
863
+ )
864
+
865
+ def __getattr__(self, name):
866
+ # Forward other attributes to original citation_manager
867
+ return getattr(self._cm, name)
868
+
869
+ class AsyncManagerAgentWrapper:
870
+ """Wrapper to use async manager agent methods in parallel mode"""
871
+
872
+ def __init__(self, ma):
873
+ self._ma = ma
874
+
875
+ async def add_new_topic(self, sub_topic, overview):
876
+ return await self._ma.add_new_topic_async(sub_topic, overview)
877
+
878
+ def __getattr__(self, name):
879
+ # Forward other attributes to original manager_agent
880
+ return getattr(self._ma, name)
881
+
882
+ async_citation_manager = AsyncCitationManagerWrapper(self.citation_manager)
883
+ async_manager_agent = AsyncManagerAgentWrapper(manager)
884
+
885
+ # Track active tasks for parallel progress display
886
+ active_tasks: dict[str, dict[str, Any]] = {} # block_id -> task info
887
+ active_tasks_lock = asyncio.Lock()
888
+
889
+ async def update_active_task(block_id: str, info: dict[str, Any] | None):
890
+ """Update active task info (thread-safe)"""
891
+ async with active_tasks_lock:
892
+ if info is None:
893
+ active_tasks.pop(block_id, None)
894
+ else:
895
+ active_tasks[block_id] = info
896
+ # Send parallel status update
897
+ self._log_researching_progress(
898
+ "parallel_status_update",
899
+ active_tasks=list(active_tasks.values()),
900
+ active_count=len(active_tasks),
901
+ completed_count=completed_count["value"],
902
+ total_blocks=total_blocks,
903
+ )
904
+
905
+ async def research_single_block(block: Any) -> dict[str, Any] | None:
906
+ """
907
+ Research a single topic block with semaphore control
908
+
909
+ Args:
910
+ block: TopicBlock to research
911
+
912
+ Returns:
913
+ Research result or None if failed
914
+ """
915
+ async with semaphore:
916
+ try:
917
+ # Mark as researching (thread-safe)
918
+ async with manager._lock:
919
+ # Refresh block status from queue
920
+ current_block = self.queue.get_block_by_id(block.block_id)
921
+ if current_block and current_block.status == TopicStatus.PENDING:
922
+ self.queue.mark_researching(block.block_id)
923
+
924
+ # Add to active tasks
925
+ await update_active_task(
926
+ block.block_id,
927
+ {
928
+ "block_id": block.block_id,
929
+ "sub_topic": block.sub_topic,
930
+ "status": "starting",
931
+ "iteration": 0,
932
+ "current_tool": None,
933
+ "current_query": None,
934
+ },
935
+ )
936
+
937
+ self._log_researching_progress(
938
+ "block_started",
939
+ block_id=block.block_id,
940
+ sub_topic=block.sub_topic,
941
+ execution_mode="parallel",
942
+ active_count=len(active_tasks),
943
+ )
944
+
945
+ if self.logger:
946
+ self.logger.info(
947
+ f"\n[{block.block_id}] 🔍 Starting research: {block.sub_topic}"
948
+ )
949
+
950
+ # Get max_iterations from config for this closure
951
+ config_max_iterations = self.config.get("researching", {}).get(
952
+ "max_iterations", 5
953
+ )
954
+
955
+ # Create iteration callback for parallel mode
956
+ def parallel_iteration_callback(event_type: str, **data):
957
+ """Handle iteration progress in parallel mode"""
958
+ # Update active task info
959
+ task_info = {
960
+ "block_id": block.block_id,
961
+ "sub_topic": block.sub_topic,
962
+ "status": event_type,
963
+ "iteration": data.get("iteration", 0),
964
+ "max_iterations": data.get("max_iterations", config_max_iterations),
965
+ "current_tool": data.get("tool_type"),
966
+ "current_query": data.get("query"),
967
+ "tools_used": data.get("tools_used", []),
968
+ }
969
+ # Schedule async update
970
+ asyncio.create_task(update_active_task(block.block_id, task_info))
971
+
972
+ # Also log the detailed progress
973
+ self._log_researching_progress(
974
+ event_type,
975
+ block_id=block.block_id,
976
+ sub_topic=block.sub_topic,
977
+ execution_mode="parallel",
978
+ **data,
979
+ )
980
+
981
+ # Execute research loop with async wrappers
982
+ result = await research.process(
983
+ topic_block=block,
984
+ call_tool_callback=self._call_tool,
985
+ note_agent=self.agents["note"],
986
+ citation_manager=async_citation_manager,
987
+ queue=self.queue,
988
+ manager_agent=async_manager_agent,
989
+ config=self.config,
990
+ progress_callback=parallel_iteration_callback,
991
+ )
992
+
993
+ # Mark as completed (thread-safe)
994
+ await manager.complete_task_async(block.block_id)
995
+ completed_count["value"] += 1
996
+
997
+ # Remove from active tasks
998
+ await update_active_task(block.block_id, None)
999
+
1000
+ self._log_researching_progress(
1001
+ "block_completed",
1002
+ block_id=block.block_id,
1003
+ sub_topic=block.sub_topic,
1004
+ iterations=result.get("iterations", 0),
1005
+ tools_used=result.get("tools_used", []),
1006
+ queries_used=result.get("queries_used", []),
1007
+ current_block=completed_count["value"],
1008
+ total_blocks=total_blocks,
1009
+ execution_mode="parallel",
1010
+ )
1011
+
1012
+ if self.logger:
1013
+ self.logger.success(f"[{block.block_id}] ✓ Completed: {block.sub_topic}")
1014
+
1015
+ return result
1016
+
1017
+ except Exception as e:
1018
+ # Mark as failed (thread-safe)
1019
+ await manager.fail_task_async(block.block_id, str(e))
1020
+ completed_count["value"] += 1
1021
+
1022
+ # Remove from active tasks
1023
+ await update_active_task(block.block_id, None)
1024
+
1025
+ if self.logger:
1026
+ self.logger.error(f"[{block.block_id}] ✗ Failed: {block.sub_topic} - {e}")
1027
+
1028
+ self._log_researching_progress(
1029
+ "block_failed",
1030
+ block_id=block.block_id,
1031
+ sub_topic=block.sub_topic,
1032
+ error=str(e),
1033
+ execution_mode="parallel",
1034
+ )
1035
+ return None
1036
+
1037
+ # Execute all research tasks in parallel
1038
+ tasks = [research_single_block(block) for block in pending_blocks]
1039
+ results = await asyncio.gather(*tasks, return_exceptions=True)
1040
+
1041
+ # Handle any exceptions that weren't caught
1042
+ for i, result in enumerate(results):
1043
+ if isinstance(result, Exception):
1044
+ block = pending_blocks[i]
1045
+ await manager.fail_task_async(block.block_id, str(result))
1046
+ if self.logger:
1047
+ self.logger.error(f"[{block.block_id}] ✗ Exception: {result}")
1048
+
1049
+ # Wait for any dynamically added topics (if manager adds new topics during research)
1050
+ # Continue until all tasks are processed (completed or failed)
1051
+ max_wait_iterations = 100 # Prevent infinite loop
1052
+ wait_count = 0
1053
+
1054
+ while True:
1055
+ # Check if all blocks are processed (COMPLETED or FAILED, not PENDING or RESEARCHING)
1056
+ stats = self.queue.get_statistics()
1057
+ pending_count = stats.get("pending", 0)
1058
+ researching_count = stats.get("researching", 0)
1059
+
1060
+ # Exit if no pending or researching tasks
1061
+ if pending_count == 0 and researching_count == 0:
1062
+ break
1063
+
1064
+ # Get any newly added pending blocks
1065
+ new_pending = [b for b in self.queue.blocks if b.status == TopicStatus.PENDING]
1066
+ if not new_pending:
1067
+ # No pending blocks, but there might be researching ones
1068
+ # Wait a bit for them to complete
1069
+ wait_count += 1
1070
+ if wait_count > max_wait_iterations:
1071
+ self.logger.warning(
1072
+ "Max wait iterations reached, exiting parallel research loop"
1073
+ )
1074
+ break
1075
+ await asyncio.sleep(0.1)
1076
+ continue
1077
+
1078
+ # Reset wait count when we have new work
1079
+ wait_count = 0
1080
+
1081
+ # Research newly added blocks
1082
+ new_tasks = [research_single_block(block) for block in new_pending]
1083
+ new_results = await asyncio.gather(*new_tasks, return_exceptions=True)
1084
+
1085
+ for i, result in enumerate(new_results):
1086
+ if isinstance(result, Exception):
1087
+ block = new_pending[i]
1088
+ await manager.fail_task_async(block.block_id, str(result))
1089
+
1090
+ stats = self.queue.get_statistics()
1091
+ self._log_researching_progress(
1092
+ "researching_completed",
1093
+ completed_blocks=stats["completed"],
1094
+ total_tool_calls=stats["total_tool_calls"],
1095
+ execution_mode="parallel",
1096
+ )
1097
+
1098
+ self.logger.success("\nPhase 2 Completed (Parallel Mode):")
1099
+ self.logger.info(f" - Completed Topics: {stats['completed']}")
1100
+ self.logger.info(f" - Total Tool Calls: {stats['total_tool_calls']}")
1101
+ self.logger.info(f" - Failed Topics: {stats.get('failed', 0)}")
1102
+
1103
+ def _log_researching_progress(self, status: str, **payload: Any) -> None:
1104
+ """Record researching stage progress (thread-safe for parallel mode)"""
1105
+ event = {"status": status, "timestamp": datetime.now().isoformat()}
1106
+ event.update({k: v for k, v in payload.items() if v is not None})
1107
+
1108
+ # Use lock to prevent concurrent file writes in parallel mode
1109
+ with self._progress_file_lock:
1110
+ if "researching" not in self._stage_events:
1111
+ self._stage_events["researching"] = []
1112
+ self._stage_events["researching"].append(event)
1113
+
1114
+ # Save to file
1115
+ research_progress_file = self.cache_dir / "researching_progress.json"
1116
+ context = {
1117
+ "research_id": self.research_id,
1118
+ "stage": "researching",
1119
+ "input_topic": self.input_topic,
1120
+ "optimized_topic": self.optimized_topic,
1121
+ "events": self._stage_events["researching"],
1122
+ }
1123
+ with open(research_progress_file, "w", encoding="utf-8") as f:
1124
+ json.dump(context, f, ensure_ascii=False, indent=2)
1125
+
1126
+ # Send progress via callback
1127
+ if self.progress_callback:
1128
+ try:
1129
+ progress_event = {
1130
+ "type": "progress",
1131
+ "stage": "researching",
1132
+ "status": status,
1133
+ "research_id": self.research_id,
1134
+ **{k: v for k, v in payload.items() if v is not None},
1135
+ }
1136
+ self.progress_callback(progress_event)
1137
+ except Exception as e:
1138
+ self.logger.warning(f"Progress callback failed: {e}")
1139
+
1140
+ def _create_iteration_progress_callback(
1141
+ self,
1142
+ block_id: str,
1143
+ sub_topic: str,
1144
+ execution_mode: str,
1145
+ current_block: int = None,
1146
+ total_blocks: int = None,
1147
+ ) -> Callable:
1148
+ """
1149
+ Create a progress callback for research iterations
1150
+
1151
+ Args:
1152
+ block_id: Block ID for the current topic
1153
+ sub_topic: Current sub-topic being researched
1154
+ execution_mode: 'series' or 'parallel'
1155
+ current_block: Current block number (for series mode)
1156
+ total_blocks: Total number of blocks
1157
+
1158
+ Returns:
1159
+ Callback function for iteration progress
1160
+ """
1161
+
1162
+ def iteration_callback(event_type: str, **data: Any):
1163
+ """Callback for iteration progress events"""
1164
+ payload = {
1165
+ "block_id": block_id,
1166
+ "sub_topic": sub_topic,
1167
+ "execution_mode": execution_mode,
1168
+ }
1169
+ if current_block is not None:
1170
+ payload["current_block"] = current_block
1171
+ if total_blocks is not None:
1172
+ payload["total_blocks"] = total_blocks
1173
+ payload.update(data)
1174
+
1175
+ self._log_researching_progress(event_type, **payload)
1176
+
1177
+ return iteration_callback
1178
+
1179
+ async def _phase3_reporting(self, topic: str) -> dict[str, Any]:
1180
+ """
1181
+ Phase 3: Report Generation
1182
+
1183
+ Args:
1184
+ topic: Research topic
1185
+
1186
+ Returns:
1187
+ Report result
1188
+ """
1189
+ reporting = self.agents["reporting"]
1190
+
1191
+ # Set citation manager
1192
+ reporting.set_citation_manager(self.citation_manager)
1193
+
1194
+ # Generate report
1195
+ report_result = await reporting.process(
1196
+ self.queue, topic, progress_callback=self._report_progress_callback
1197
+ )
1198
+
1199
+ self.logger.success("\nPhase 3 Completed:")
1200
+ self.logger.info(f" - Report Word Count: {report_result['word_count']}")
1201
+ self.logger.info(f" - Sections: {report_result['sections']}")
1202
+ self.logger.info(f" - Citations: {report_result['citations']}")
1203
+
1204
+ return report_result
1205
+
1206
+ def _log_progress(self, stage: str, status: str, **payload: Any) -> None:
1207
+ """Record stage progress to JSON file and send progress via callback"""
1208
+ if stage not in self._stage_events:
1209
+ return
1210
+ event = {"status": status, "timestamp": datetime.now().isoformat()}
1211
+ event.update({k: v for k, v in payload.items() if v is not None})
1212
+ self._stage_events[stage].append(event)
1213
+ file_path = self.plan_progress_file if stage == "planning" else self.report_progress_file
1214
+ context = {
1215
+ "research_id": self.research_id,
1216
+ "stage": stage,
1217
+ "input_topic": self.input_topic,
1218
+ "optimized_topic": self.optimized_topic,
1219
+ "events": self._stage_events[stage],
1220
+ }
1221
+ with open(file_path, "w", encoding="utf-8") as f:
1222
+ json.dump(context, f, ensure_ascii=False, indent=2)
1223
+
1224
+ # Send progress via callback (if callback function is set)
1225
+ if self.progress_callback:
1226
+ try:
1227
+ progress_event = {
1228
+ "type": "progress",
1229
+ "stage": stage,
1230
+ "status": status,
1231
+ "research_id": self.research_id,
1232
+ **{k: v for k, v in payload.items() if v is not None},
1233
+ }
1234
+ self.progress_callback(progress_event)
1235
+ except Exception as e:
1236
+ self.logger.warning(f"Progress callback failed: {e}")
1237
+
1238
+ def _report_progress_callback(self, event: dict[str, Any]) -> None:
1239
+ """Reporting stage progress callback"""
1240
+ status = event.pop("status", "unknown")
1241
+ self._log_progress("reporting", status, **event)
1242
+
1243
+
1244
+ async def main():
1245
+ """Main function"""
1246
+ import argparse
1247
+
1248
+ from dotenv import load_dotenv
1249
+ import yaml
1250
+
1251
+ from src.services.llm import get_llm_config
1252
+
1253
+ # Load environment variables
1254
+ load_dotenv()
1255
+
1256
+ # Parse command line arguments
1257
+ parser = argparse.ArgumentParser(description="DR-in-KG 2.0 - Deep Research System")
1258
+ parser.add_argument("--topic", type=str, required=True, help="Research topic")
1259
+ parser.add_argument("--config", type=str, default="config.yaml", help="Configuration file")
1260
+ parser.add_argument(
1261
+ "--preset", type=str, choices=["quick", "medium", "deep", "auto"], help="Preset mode"
1262
+ )
1263
+
1264
+ args = parser.parse_args()
1265
+
1266
+ # Load configuration
1267
+ config_path = PROJECT_ROOT / args.config
1268
+ if not config_path.exists():
1269
+ logger = get_logger("Research")
1270
+ logger.error(f"Configuration file not found: {config_path}")
1271
+ sys.exit(1)
1272
+
1273
+ with open(config_path, encoding="utf-8") as f:
1274
+ config = yaml.safe_load(f)
1275
+
1276
+ # Apply preset
1277
+ if args.preset and "presets" in config and args.preset in config["presets"]:
1278
+ preset = config["presets"][args.preset]
1279
+ # Merge preset configuration
1280
+ for key, value in preset.items():
1281
+ if key in config and isinstance(value, dict):
1282
+ config[key].update(value)
1283
+
1284
+ # Get LLM configuration
1285
+ llm_config = get_llm_config()
1286
+
1287
+ # Create research pipeline
1288
+ pipeline = ResearchPipeline(
1289
+ config=config, api_key=llm_config.api_key, base_url=llm_config.base_url
1290
+ )
1291
+
1292
+ # Execute research
1293
+ result = await pipeline.run(args.topic)
1294
+
1295
+ logger = get_logger("Research")
1296
+ logger.success("\n🎉 Research completed!")
1297
+ logger.info(f"Report location: {result['final_report_path']}")
1298
+
1299
+
1300
+ if __name__ == "__main__":
1301
+ if sys.platform == "win32":
1302
+ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
1303
+
1304
+ asyncio.run(main())
1305
+
1306
+
1307
+ # Backward compatibility alias: old code can use ResearchPipeline, new code referencing ResearchPipeline2 won't error
1308
+ ResearchPipeline2 = ResearchPipeline
1309
+ __all__ = ["ResearchPipeline", "ResearchPipeline2"]