realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,414 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ InvestigateAgent - Investigator
4
+ Generates query actions and calls tools based on current memory and reflections.
5
+ """
6
+
7
+ from pathlib import Path
8
+ import sys
9
+ from typing import Any
10
+
11
+ # Add project root to path
12
+ project_root = Path(__file__).parent.parent.parent.parent
13
+ sys.path.insert(0, str(project_root))
14
+
15
+ import json
16
+
17
+ from src.agents.base_agent import BaseAgent
18
+ from src.tools import query_numbered_item, rag_search, web_search
19
+
20
+ from ..memory import CitationMemory, InvestigateMemory, KnowledgeItem
21
+ from ..utils.json_utils import extract_json_from_text
22
+
23
+
24
+ class InvestigateAgent(BaseAgent):
25
+ """Investigator Agent - Generates queries and calls tools"""
26
+
27
+ def __init__(
28
+ self,
29
+ config: dict[str, Any],
30
+ api_key: str,
31
+ base_url: str,
32
+ api_version: str | None = None,
33
+ token_tracker=None,
34
+ ):
35
+ language = config.get("system", {}).get("language", "zh")
36
+ super().__init__(
37
+ module_name="solve",
38
+ agent_name="investigate_agent",
39
+ api_key=api_key,
40
+ base_url=base_url,
41
+ api_version=api_version,
42
+ language=language,
43
+ config=config,
44
+ token_tracker=token_tracker,
45
+ )
46
+ # Read web_search enabled config from tools.web_search.enabled
47
+ self.enable_web_search = config.get("tools", {}).get("web_search", {}).get("enabled", True)
48
+
49
+ # Read agent-specific config from solve.agents.investigate_agent
50
+ agent_config = config.get("solve", {}).get("agents", {}).get("investigate_agent", {})
51
+ self.max_actions_per_round = agent_config.get("max_actions_per_round", 1)
52
+ self.max_iterations = agent_config.get("max_iterations", 3)
53
+
54
+ async def process(
55
+ self,
56
+ question: str,
57
+ memory: InvestigateMemory,
58
+ citation_memory: CitationMemory,
59
+ kb_name: str = "ai_textbook",
60
+ output_dir: str | None = None,
61
+ verbose: bool = True,
62
+ ) -> dict[str, Any]:
63
+ """
64
+ Process investigation flow (supports multiple tools per round)
65
+
66
+ Args:
67
+ question: User question
68
+ memory: Investigation memory
69
+ citation_memory: Citation memory (for registering citations)
70
+ kb_name: Knowledge base name
71
+ output_dir: Output directory
72
+ verbose: Whether to print detailed info
73
+
74
+ Returns:
75
+ dict: Investigation result
76
+ {
77
+ 'reasoning': str,
78
+ 'should_stop': bool,
79
+ 'knowledge_item_ids': List[str],
80
+ 'actions': List[Dict[str, Any]]
81
+ }
82
+ """
83
+ if citation_memory is None:
84
+ raise ValueError(
85
+ "citation_memory cannot be None, InvestigateAgent needs it for citation registration"
86
+ )
87
+
88
+ # 1. Build context
89
+ context = self._build_context(question, memory)
90
+
91
+ # 2. Build prompts
92
+ system_prompt = self._build_system_prompt()
93
+ user_prompt = self._build_user_prompt(context)
94
+
95
+ # 3. Call LLM
96
+ response = await self.call_llm(
97
+ user_prompt=user_prompt,
98
+ system_prompt=system_prompt,
99
+ verbose=verbose,
100
+ response_format={"type": "json_object"},
101
+ )
102
+
103
+ # 4. Parse output (JSON)
104
+ parsed_result = extract_json_from_text(response)
105
+
106
+ if not parsed_result or not isinstance(parsed_result, dict):
107
+ self.logger.warning("Parse failed: LLM did not return valid JSON")
108
+ return {
109
+ "reasoning": "Parse failed: invalid JSON",
110
+ "should_stop": True,
111
+ "knowledge_item_ids": [],
112
+ "actions": [],
113
+ }
114
+
115
+ reasoning = parsed_result.get("reasoning", "")
116
+ tool_plans = parsed_result.get("plan", [])
117
+
118
+ # Ensure tool_plans is a list (handle case where LLM returns dict instead of list)
119
+ if not isinstance(tool_plans, list):
120
+ if isinstance(tool_plans, dict):
121
+ # If plan is a dict, wrap it in a list
122
+ self.logger.warning("Parse warning: 'plan' field is a dict, wrapping in list")
123
+ tool_plans = [tool_plans]
124
+ else:
125
+ self.logger.warning(
126
+ "Parse warning: 'plan' field is not a list or dict, using empty list"
127
+ )
128
+ tool_plans = []
129
+
130
+ # 5. Determine if should stop
131
+ should_stop = False
132
+ if not tool_plans:
133
+ should_stop = True
134
+ else:
135
+ for plan in tool_plans:
136
+ if plan.get("tool") == "none":
137
+ should_stop = True
138
+ break
139
+
140
+ if should_stop:
141
+ return {
142
+ "reasoning": reasoning,
143
+ "should_stop": True,
144
+ "knowledge_item_ids": [],
145
+ "actions": [],
146
+ }
147
+
148
+ # 6. Execute multiple tool calls (limited by max_actions_per_round)
149
+ knowledge_ids: list[str] = []
150
+ executed_actions: list[dict[str, Any]] = []
151
+
152
+ # Limit number of actions per round based on config
153
+ tool_plans_to_execute = tool_plans[: self.max_actions_per_round]
154
+
155
+ for plan in tool_plans_to_execute:
156
+ tool_type = plan.get("tool")
157
+ if not tool_type:
158
+ continue
159
+
160
+ query = plan.get("query", "")
161
+ identifier = plan.get("identifier")
162
+
163
+ if tool_type == "none":
164
+ continue
165
+
166
+ knowledge_item = await self._execute_single_action(
167
+ tool_selection=tool_type,
168
+ query=query,
169
+ identifier=identifier,
170
+ kb_name=kb_name,
171
+ output_dir=output_dir,
172
+ citation_memory=citation_memory,
173
+ )
174
+
175
+ executed_actions.append(
176
+ {
177
+ "tool_type": tool_type,
178
+ "query": query,
179
+ "identifier": identifier,
180
+ "cite_id": knowledge_item.cite_id if knowledge_item else None,
181
+ }
182
+ )
183
+
184
+ if knowledge_item:
185
+ memory.add_knowledge(knowledge_item)
186
+ knowledge_ids.append(knowledge_item.cite_id)
187
+
188
+ if knowledge_ids and output_dir:
189
+ memory.save()
190
+
191
+ # 7. Return results
192
+ return {
193
+ "reasoning": reasoning,
194
+ "should_stop": False,
195
+ "knowledge_item_ids": knowledge_ids,
196
+ "actions": executed_actions,
197
+ }
198
+
199
+ def _build_context(self, question: str, memory: InvestigateMemory) -> dict[str, Any]:
200
+ """Build context (pass full content, no truncation)"""
201
+ knowledge_chain_full = []
202
+ for item in memory.knowledge_chain:
203
+ knowledge_chain_full.append(
204
+ {
205
+ "cite_id": item.cite_id,
206
+ "tool_type": item.tool_type,
207
+ "query": item.query,
208
+ "raw_result": item.raw_result,
209
+ "summary": item.summary,
210
+ }
211
+ )
212
+
213
+ remaining_questions_full = []
214
+ if memory.reflections and memory.reflections.remaining_questions:
215
+ remaining_questions_full = memory.reflections.remaining_questions.copy()
216
+ knowledge_chain_summary = (
217
+ "\n".join(
218
+ f"- {item.cite_id} ({item.tool_type}): {item.summary or item.raw_result[:200]}"
219
+ for item in memory.knowledge_chain
220
+ )
221
+ if memory.knowledge_chain
222
+ else "(none)"
223
+ )
224
+ reflections_summary = (
225
+ "\n".join(f"- {q}" for q in remaining_questions_full)
226
+ if remaining_questions_full
227
+ else "(no remaining questions)"
228
+ )
229
+
230
+ return {
231
+ "question": question,
232
+ "num_knowledge": len(memory.knowledge_chain),
233
+ "knowledge_chain_full": knowledge_chain_full,
234
+ "knowledge_chain_summary": knowledge_chain_summary,
235
+ "reflections_summary": reflections_summary,
236
+ "remaining_questions": remaining_questions_full,
237
+ "action_queue": "(no action history)",
238
+ }
239
+
240
+ def _build_system_prompt(self) -> str:
241
+ """Build system prompt"""
242
+ prompt = self.get_prompt("system") if self.has_prompts() else None
243
+ if not prompt:
244
+ raise ValueError(
245
+ "InvestigateAgent missing system prompt. Configure in src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml"
246
+ )
247
+
248
+ # If web_search is disabled, remove web_search related content from prompt
249
+ if not self.enable_web_search:
250
+ # Get the web_search disabled prompt if available, otherwise filter out web_search lines
251
+ web_search_disabled_prompt = (
252
+ self.get_prompt("web_search_disabled") if self.has_prompts() else None
253
+ )
254
+ if web_search_disabled_prompt:
255
+ # Replace web_search description with disabled message
256
+ prompt = prompt.replace(
257
+ self.get_prompt("web_search_description") or "", web_search_disabled_prompt
258
+ )
259
+ else:
260
+ # Simple filter: remove lines containing web_search tool description
261
+ lines = prompt.split("\n")
262
+ filtered_lines = []
263
+ for line in lines:
264
+ # Skip lines that describe web_search as an available tool
265
+ if "`web_search`" in line and (
266
+ "Use Sparingly" in line or "latest news" in line or "Web Search" in line
267
+ ):
268
+ continue
269
+ # Also remove web_search from tool list in output format
270
+ if "web_search" in line and (
271
+ "rag_naive | rag_hybrid |" in line or 'tool":' in line
272
+ ):
273
+ line = (
274
+ line.replace(" | web_search", "")
275
+ .replace("| web_search", "")
276
+ .replace("web_search |", "")
277
+ .replace("web_search", "")
278
+ )
279
+ filtered_lines.append(line)
280
+ prompt = "\n".join(filtered_lines)
281
+
282
+ return prompt
283
+
284
+ def _build_user_prompt(self, context: dict[str, Any]) -> str:
285
+ """Build user prompt (pass full content)"""
286
+ template = self.get_prompt("user_template") if self.has_prompts() else None
287
+ if not template:
288
+ raise ValueError(
289
+ "InvestigateAgent missing user prompt template. Configure in prompts/en/analysis_loop/investigate_agent.yaml"
290
+ )
291
+ return template.format(**context)
292
+
293
+ async def _execute_single_action(
294
+ self,
295
+ tool_selection: str,
296
+ query: str,
297
+ identifier: str | None,
298
+ kb_name: str,
299
+ output_dir: str | None,
300
+ citation_memory: CitationMemory,
301
+ ) -> KnowledgeItem | None:
302
+ """Execute a single tool call"""
303
+ import time
304
+
305
+ start_time = time.time()
306
+ tool_input = {"query": query, "identifier": identifier, "kb_name": kb_name}
307
+
308
+ try:
309
+ if tool_selection == "rag_naive":
310
+ result = await self._call_rag_naive(query, kb_name, output_dir)
311
+ raw_result = result.get("answer", "")
312
+
313
+ elif tool_selection == "rag_hybrid":
314
+ result = await self._call_rag_hybrid(query, kb_name, output_dir)
315
+ raw_result = result.get("answer", "")
316
+
317
+ elif tool_selection == "web_search":
318
+ # Check if web_search is enabled
319
+ if not self.enable_web_search:
320
+ self.logger.warning(
321
+ "Tool call rejected (web_search): web_search is disabled in config"
322
+ )
323
+ return None
324
+ result = await self._call_web_search(query, output_dir)
325
+ raw_result = json.dumps(result, ensure_ascii=False, indent=2)
326
+
327
+ elif tool_selection == "query_item":
328
+ identifier_to_use = identifier or query
329
+
330
+ if (
331
+ not identifier_to_use
332
+ or not isinstance(identifier_to_use, str)
333
+ or not identifier_to_use.strip()
334
+ ):
335
+ self.logger.warning(
336
+ "Tool call failed (query_item): identifier is empty or invalid"
337
+ )
338
+ return None
339
+
340
+ result = await self._call_query_item(identifier_to_use, kb_name)
341
+ raw_result = result.get("content", result.get("answer", ""))
342
+
343
+ else:
344
+ self.logger.warning(f"Unknown tool type: {tool_selection}")
345
+ return None
346
+
347
+ elapsed_ms = (time.time() - start_time) * 1000
348
+
349
+ # Create and register citation
350
+ cite_id = citation_memory.add_citation(
351
+ tool_type=tool_selection,
352
+ query=query,
353
+ raw_result=raw_result,
354
+ stage="analysis",
355
+ metadata={"identifier": identifier},
356
+ )
357
+ citation_memory.save()
358
+
359
+ # Log tool call
360
+ self.logger.log_tool_call(
361
+ tool_name=tool_selection,
362
+ tool_input=tool_input,
363
+ tool_output=result,
364
+ status="success",
365
+ elapsed_ms=elapsed_ms,
366
+ citation_id=cite_id,
367
+ )
368
+
369
+ # Create knowledge item
370
+ knowledge_item = KnowledgeItem(
371
+ cite_id=cite_id,
372
+ tool_type=tool_selection,
373
+ query=query,
374
+ raw_result=raw_result,
375
+ summary="", # Generated by NoteAgent
376
+ )
377
+
378
+ return knowledge_item
379
+
380
+ except Exception as e:
381
+ elapsed_ms = (time.time() - start_time) * 1000
382
+ error_msg = str(e)
383
+
384
+ self.logger.log_tool_call(
385
+ tool_name=tool_selection,
386
+ tool_input=tool_input,
387
+ tool_output=error_msg,
388
+ status="failed",
389
+ elapsed_ms=elapsed_ms,
390
+ error=error_msg,
391
+ )
392
+
393
+ self.logger.warning(f"Tool call failed ({tool_selection}): {e}")
394
+ return None
395
+
396
+ async def _call_rag_naive(
397
+ self, query: str, kb_name: str, output_dir: str | None
398
+ ) -> dict[str, Any]:
399
+ """Call RAG Naive"""
400
+ return await rag_search(query=query, kb_name=kb_name, mode="naive")
401
+
402
+ async def _call_rag_hybrid(
403
+ self, query: str, kb_name: str, output_dir: str | None
404
+ ) -> dict[str, Any]:
405
+ """Call RAG Hybrid"""
406
+ return await rag_search(query=query, kb_name=kb_name, mode="hybrid")
407
+
408
+ async def _call_web_search(self, query: str, output_dir: str | None) -> dict[str, Any]:
409
+ """Call Web Search"""
410
+ return web_search(query=query, output_dir=output_dir or "./cache", verbose=False)
411
+
412
+ async def _call_query_item(self, identifier: str, kb_name: str) -> dict[str, Any]:
413
+ """Call Query Item"""
414
+ return query_numbered_item(identifier=identifier, kb_name=kb_name)
@@ -0,0 +1,190 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ NoteAgent - Note taker
5
+ Based on new knowledge, generates or updates notes, annotates covered pain points and missing points
6
+ """
7
+
8
+ from pathlib import Path
9
+ import sys
10
+ from typing import Any
11
+
12
+ # Add project root to path
13
+ project_root = Path(__file__).parent.parent.parent.parent
14
+ sys.path.insert(0, str(project_root))
15
+
16
+ from src.agents.base_agent import BaseAgent
17
+
18
+ from ..memory import CitationMemory, InvestigateMemory, KnowledgeItem
19
+ from ..utils import ParseError, validate_note_output
20
+ from ..utils.json_utils import extract_json_from_text
21
+
22
+
23
+ class NoteAgent(BaseAgent):
24
+ """Note taker Agent - Generates and updates notes"""
25
+
26
+ def __init__(
27
+ self,
28
+ config: dict[str, Any],
29
+ api_key: str,
30
+ base_url: str,
31
+ api_version: str | None = None,
32
+ token_tracker=None,
33
+ ):
34
+ language = config.get("system", {}).get("language", "zh")
35
+ super().__init__(
36
+ module_name="solve",
37
+ agent_name="note_agent",
38
+ api_key=api_key,
39
+ base_url=base_url,
40
+ api_version=api_version,
41
+ language=language,
42
+ config=config,
43
+ token_tracker=token_tracker,
44
+ )
45
+
46
+ async def process(
47
+ self,
48
+ question: str,
49
+ memory: InvestigateMemory,
50
+ new_knowledge_ids: list[str],
51
+ citation_memory: CitationMemory | None = None,
52
+ output_dir: str | None = None,
53
+ verbose: bool = True,
54
+ ) -> dict[str, Any]:
55
+ """
56
+ Process note generation workflow (supports multiple knowledge_items in single round)
57
+
58
+ Args:
59
+ question: User question
60
+ memory: Investigation memory
61
+ new_knowledge_ids: List of cite_ids for new knowledge items
62
+ citation_memory: Citation memory
63
+ output_dir: Output directory
64
+ verbose: Whether to print detailed information
65
+
66
+ Returns:
67
+ dict: Aggregated note result
68
+ """
69
+ if isinstance(new_knowledge_ids, str): # Backward compatibility
70
+ target_ids = [new_knowledge_ids]
71
+ else:
72
+ target_ids = new_knowledge_ids or []
73
+
74
+ if not target_ids:
75
+ return {"success": False, "reason": "new_knowledge_ids is empty"}
76
+
77
+ system_prompt = self._build_system_prompt()
78
+ processed_details = []
79
+ failed_ids = []
80
+
81
+ for cite_id in target_ids:
82
+ knowledge_item = next((k for k in memory.knowledge_chain if k.cite_id == cite_id), None)
83
+ if not knowledge_item:
84
+ failed_ids.append({"cite_id": cite_id, "reason": "knowledge_item not found"})
85
+ continue
86
+
87
+ context = self._build_context(question, knowledge_item, memory)
88
+ user_prompt = self._build_user_prompt(context)
89
+
90
+ response = await self.call_llm(
91
+ user_prompt=user_prompt,
92
+ system_prompt=system_prompt,
93
+ verbose=verbose,
94
+ response_format={"type": "json_object"},
95
+ )
96
+
97
+ if verbose:
98
+ self.logger.debug(f"cite_id={cite_id} LLM output stats:")
99
+ self.logger.debug(f" - Total length: {len(response)} chars")
100
+ self.logger.debug(
101
+ f" - Last 100 chars: ...{response[-100:] if len(response) > 100 else response}"
102
+ )
103
+
104
+ try:
105
+ parsed_result = extract_json_from_text(response)
106
+ if not parsed_result or not isinstance(parsed_result, dict):
107
+ raise ParseError("LLM output is not a valid JSON object")
108
+ # citations is optional, default to empty list
109
+ if parsed_result.get("citations") is None:
110
+ parsed_result["citations"] = []
111
+ elif not isinstance(parsed_result.get("citations"), list):
112
+ raise ParseError("citations field must be a list")
113
+ validate_note_output(parsed_result)
114
+ if verbose:
115
+ summary_len = len(parsed_result.get("summary", ""))
116
+ print(f"📝 [NoteAgent] cite_id={cite_id} summary length: {summary_len}")
117
+ except ParseError as e:
118
+ failed_ids.append({"cite_id": cite_id, "reason": str(e)})
119
+ continue
120
+ except Exception as e:
121
+ failed_ids.append({"cite_id": cite_id, "reason": str(e)})
122
+ continue
123
+
124
+ citations = parsed_result.get("citations", [])
125
+ memory.update_knowledge_summary(cite_id=cite_id, summary=parsed_result["summary"])
126
+
127
+ if citation_memory:
128
+ sources = ", ".join(
129
+ citation.get("source", "") for citation in citations if citation.get("source")
130
+ )
131
+ metadata_block = {"extracted_sources": citations} if citations else None
132
+ try:
133
+ citation_memory.update_citation(
134
+ cite_id=cite_id,
135
+ content=parsed_result["summary"],
136
+ source=sources or None,
137
+ metadata=metadata_block,
138
+ stage="analysis",
139
+ )
140
+ citation_memory.save()
141
+ except ValueError:
142
+ if verbose:
143
+ print(f"⚠️ cite_id not found in CitationMemory: {cite_id}")
144
+
145
+ processed_details.append(
146
+ {
147
+ "cite_id": cite_id,
148
+ "summary": parsed_result["summary"],
149
+ "citations_count": len(citations),
150
+ }
151
+ )
152
+
153
+ if processed_details and output_dir:
154
+ memory.save()
155
+
156
+ return {
157
+ "success": len(failed_ids) == 0,
158
+ "processed_items": len(processed_details),
159
+ "details": processed_details,
160
+ "failed": failed_ids,
161
+ }
162
+
163
+ def _build_context(
164
+ self, question: str, knowledge_item: KnowledgeItem, memory: InvestigateMemory
165
+ ) -> dict[str, Any]:
166
+ """Build context (pass complete content)"""
167
+ return {
168
+ "question": question,
169
+ "tool_type": knowledge_item.tool_type,
170
+ "query": knowledge_item.query,
171
+ "raw_result": knowledge_item.raw_result, # Complete content
172
+ }
173
+
174
+ def _build_system_prompt(self) -> str:
175
+ """Build system prompt"""
176
+ prompt = self.get_prompt("system") if self.has_prompts() else None
177
+ if not prompt:
178
+ raise ValueError(
179
+ "NoteAgent missing system prompt, please configure system in prompts/zh/analysis_loop/note_agent.yaml."
180
+ )
181
+ return prompt
182
+
183
+ def _build_user_prompt(self, context: dict[str, Any]) -> str:
184
+ """Build user prompt (pass complete content)"""
185
+ template = self.get_prompt("user_template") if self.has_prompts() else None
186
+ if not template:
187
+ raise ValueError(
188
+ "NoteAgent missing user prompt template, please configure user_template in prompts/zh/analysis_loop/note_agent.yaml."
189
+ )
190
+ return template.format(**context)