realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,718 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ AgentCoordinator - Orchestrates question generation workflow.
5
+
6
+ Refactored version:
7
+ - Uses specialized agents: RetrieveAgent, GenerateAgent, RelevanceAnalyzer
8
+ - No iterative validation loops - single-pass generation + relevance analysis
9
+ - All questions are accepted, classified as "high" or "partial" relevance
10
+ """
11
+
12
+ from collections.abc import Callable
13
+ from datetime import datetime
14
+ import json
15
+ from pathlib import Path
16
+ import sys
17
+ from typing import Any
18
+
19
+ # Add project root for imports
20
+ project_root = Path(__file__).parent.parent.parent.parent
21
+ sys.path.insert(0, str(project_root))
22
+
23
+ from src.logging import Logger, get_logger
24
+ from src.services.config import load_config_with_main
25
+
26
+ from .agents.generate_agent import GenerateAgent
27
+ from .agents.relevance_analyzer import RelevanceAnalyzer
28
+ from .agents.retrieve_agent import RetrieveAgent
29
+
30
+
31
+ class AgentCoordinator:
32
+ """
33
+ Coordinate question generation workflow using specialized agents.
34
+
35
+ Workflow:
36
+ 1. RetrieveAgent: Generate queries and retrieve knowledge
37
+ 2. Plan: Generate question plan with focuses
38
+ 3. GenerateAgent: Generate questions
39
+ 4. RelevanceAnalyzer: Analyze relevance (no rejection, just classification)
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ api_key: str | None = None,
45
+ base_url: str | None = None,
46
+ api_version: str | None = None,
47
+ max_rounds: int = 10, # Kept for backward compatibility, but not used for iteration
48
+ kb_name: str | None = None,
49
+ output_dir: str | None = None,
50
+ language: str = "en",
51
+ ):
52
+ """
53
+ Initialize the coordinator.
54
+
55
+ Args:
56
+ api_key: API key (optional, loaded from config if not provided)
57
+ base_url: API endpoint (optional)
58
+ api_version: API version for Azure (optional)
59
+ max_rounds: Deprecated, kept for backward compatibility
60
+ kb_name: Knowledge base name
61
+ output_dir: Output directory for results
62
+ language: Language for prompts ("en" or "zh")
63
+ """
64
+ self.kb_name = kb_name
65
+ self.output_dir = output_dir
66
+ self.language = language
67
+
68
+ # Store API credentials for creating agents
69
+ self._api_key = api_key
70
+ self._base_url = base_url
71
+ self._api_version = api_version
72
+
73
+ # Load configuration
74
+ self.config = load_config_with_main("question_config.yaml", project_root)
75
+
76
+ # Initialize logger
77
+ log_dir = self.config.get("paths", {}).get("user_log_dir") or self.config.get(
78
+ "logging", {}
79
+ ).get("log_dir")
80
+ self.logger: Logger = get_logger("QuestionCoordinator", log_dir=log_dir)
81
+
82
+ # Get config values
83
+ question_cfg = self.config.get("question", {})
84
+ self.rag_query_count = question_cfg.get("rag_query_count", 3)
85
+ self.max_parallel_questions = question_cfg.get("max_parallel_questions", 1)
86
+ self.rag_mode = question_cfg.get("rag_mode", "naive")
87
+
88
+ # Token tracking - will be updated from BaseAgent shared stats
89
+ self.token_stats = {
90
+ "model": "gpt-4o-mini",
91
+ "calls": 0,
92
+ "tokens": 0,
93
+ "input_tokens": 0,
94
+ "output_tokens": 0,
95
+ "cost": 0.0,
96
+ }
97
+
98
+ # WebSocket callback for streaming updates
99
+ self._ws_callback: Callable | None = None
100
+
101
+ def _update_token_stats(self):
102
+ """Update token_stats from BaseAgent's shared LLMStats for the question module."""
103
+ from src.agents.base_agent import BaseAgent
104
+
105
+ try:
106
+ stats = BaseAgent.get_stats("question")
107
+ summary = stats.get_summary()
108
+
109
+ self.token_stats = {
110
+ "model": summary.get("model", "gpt-4o-mini"),
111
+ "calls": summary.get("calls", 0),
112
+ "tokens": summary.get("total_tokens", 0),
113
+ "input_tokens": summary.get("input_tokens", 0),
114
+ "output_tokens": summary.get("output_tokens", 0),
115
+ "cost": summary.get("cost", 0.0),
116
+ }
117
+ except Exception as e:
118
+ self.logger.debug(f"Failed to update token stats: {e}")
119
+
120
+ def set_ws_callback(self, callback: Callable):
121
+ """Set WebSocket callback for streaming updates to frontend."""
122
+ self._ws_callback = callback
123
+
124
+ async def _send_ws_update(self, update_type: str, data: dict[str, Any]):
125
+ """Send update via WebSocket callback if available."""
126
+ if self._ws_callback:
127
+ try:
128
+ await self._ws_callback({"type": update_type, **data})
129
+ except Exception as e:
130
+ self.logger.debug(f"Failed to send WS update: {e}")
131
+
132
+ def _create_retrieve_agent(self) -> RetrieveAgent:
133
+ """Create a RetrieveAgent instance."""
134
+ return RetrieveAgent(
135
+ kb_name=self.kb_name,
136
+ rag_mode=self.rag_mode,
137
+ language=self.language,
138
+ api_key=self._api_key,
139
+ base_url=self._base_url,
140
+ api_version=self._api_version,
141
+ )
142
+
143
+ def _create_generate_agent(self) -> GenerateAgent:
144
+ """Create a GenerateAgent instance."""
145
+ return GenerateAgent(
146
+ language=self.language,
147
+ api_key=self._api_key,
148
+ base_url=self._base_url,
149
+ api_version=self._api_version,
150
+ )
151
+
152
+ def _create_relevance_analyzer(self) -> RelevanceAnalyzer:
153
+ """Create a RelevanceAnalyzer instance."""
154
+ return RelevanceAnalyzer(
155
+ language=self.language,
156
+ api_key=self._api_key,
157
+ base_url=self._base_url,
158
+ api_version=self._api_version,
159
+ )
160
+
161
+ # =========================================================================
162
+ # Main Entry Points
163
+ # =========================================================================
164
+
165
+ async def generate_question(
166
+ self,
167
+ requirement: dict[str, Any],
168
+ ) -> dict[str, Any]:
169
+ """
170
+ Generate a single question with relevance analysis.
171
+
172
+ This is used by Mimic mode and for single question generation.
173
+
174
+ Args:
175
+ requirement: Question requirement dict
176
+
177
+ Returns:
178
+ Dict with:
179
+ - success: bool
180
+ - question: Generated question dict
181
+ - analysis: Relevance analysis result
182
+ - rounds: Always 1 (no iteration)
183
+ """
184
+ self.logger.section("Single Question Generation")
185
+ self.logger.info(f"Knowledge point: {requirement.get('knowledge_point', 'N/A')}")
186
+
187
+ await self._send_ws_update(
188
+ "progress", {"stage": "generating", "progress": {"status": "initializing"}}
189
+ )
190
+
191
+ # Step 1: Retrieve knowledge
192
+ retrieve_agent = self._create_retrieve_agent()
193
+ retrieval_result = await retrieve_agent.process(
194
+ requirement=requirement,
195
+ num_queries=self.rag_query_count,
196
+ )
197
+
198
+ if not retrieval_result.get("has_content"):
199
+ self.logger.warning("No relevant knowledge found")
200
+ return {
201
+ "success": False,
202
+ "error": "knowledge_not_found",
203
+ "message": "Knowledge base does not contain relevant information.",
204
+ }
205
+
206
+ knowledge_context = retrieval_result["summary"]
207
+
208
+ # Step 2: Generate question
209
+ generate_agent = self._create_generate_agent()
210
+
211
+ # Check if this is mimic mode (has reference_question)
212
+ reference_question = requirement.get("reference_question")
213
+
214
+ gen_result = await generate_agent.process(
215
+ requirement=requirement,
216
+ knowledge_context=knowledge_context,
217
+ reference_question=reference_question,
218
+ )
219
+
220
+ if not gen_result.get("success"):
221
+ self.logger.error(f"Question generation failed: {gen_result.get('error')}")
222
+ return {
223
+ "success": False,
224
+ "error": gen_result.get("error", "Generation failed"),
225
+ }
226
+
227
+ question = gen_result["question"]
228
+
229
+ # Step 3: Analyze relevance
230
+ analyzer = self._create_relevance_analyzer()
231
+ analysis = await analyzer.process(
232
+ question=question,
233
+ knowledge_context=knowledge_context,
234
+ )
235
+
236
+ self.logger.success(f"Question generated with {analysis['relevance']} relevance")
237
+
238
+ # Build result (compatible with old format)
239
+ result = {
240
+ "success": True,
241
+ "question": question,
242
+ "validation": {
243
+ "decision": "approve", # Always approve
244
+ "relevance": analysis["relevance"],
245
+ "kb_coverage": analysis["kb_coverage"],
246
+ "extension_points": analysis.get("extension_points", ""),
247
+ },
248
+ "rounds": 1, # No iteration
249
+ }
250
+
251
+ # Save to disk if output_dir is set
252
+ if self.output_dir:
253
+ self._save_question_result(result, requirement)
254
+
255
+ # Update token stats from shared LLMStats
256
+ self._update_token_stats()
257
+
258
+ return result
259
+
260
+ async def generate_questions_custom(
261
+ self,
262
+ requirement: dict[str, Any],
263
+ num_questions: int,
264
+ ) -> dict[str, Any]:
265
+ """
266
+ Custom mode: Generate multiple questions from a requirement.
267
+
268
+ Flow:
269
+ 1. Researching: Retrieve background knowledge
270
+ 2. Planning: Generate question plan with focuses
271
+ 3. Generating: Generate each question + relevance analysis
272
+
273
+ Args:
274
+ requirement: Base requirement dict (knowledge_point, difficulty, question_type)
275
+ num_questions: Number of questions to generate
276
+
277
+ Returns:
278
+ Summary dict with all results
279
+ """
280
+ if num_questions <= 0:
281
+ raise ValueError("num_questions must be greater than zero")
282
+
283
+ self.logger.section(f"Custom Mode Generation: {num_questions} question(s)")
284
+
285
+ # Create batch directory
286
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
287
+ batch_dir = Path(self.output_dir) / f"batch_{timestamp}" if self.output_dir else None
288
+ if batch_dir:
289
+ batch_dir.mkdir(parents=True, exist_ok=True)
290
+
291
+ # =====================================================================
292
+ # Stage 1: Researching
293
+ # =====================================================================
294
+ self.logger.stage("Stage 1: Researching")
295
+ await self._send_ws_update(
296
+ "progress",
297
+ {"stage": "researching", "progress": {"status": "retrieving"}, "total": num_questions},
298
+ )
299
+
300
+ retrieve_agent = self._create_retrieve_agent()
301
+ retrieval_result = await retrieve_agent.process(
302
+ requirement=requirement,
303
+ num_queries=self.rag_query_count,
304
+ )
305
+
306
+ if not retrieval_result.get("has_content"):
307
+ self.logger.warning("No relevant knowledge found")
308
+ return {
309
+ "success": False,
310
+ "error": "knowledge_not_found",
311
+ "message": "Knowledge base does not contain relevant information.",
312
+ "search_queries": retrieval_result.get("queries", []),
313
+ }
314
+
315
+ knowledge_context = retrieval_result["summary"]
316
+ queries = retrieval_result["queries"]
317
+
318
+ # Save knowledge.json
319
+ if batch_dir:
320
+ self._save_knowledge_json(batch_dir, retrieval_result)
321
+
322
+ await self._send_ws_update("knowledge_saved", {"queries": queries})
323
+
324
+ # =====================================================================
325
+ # Stage 2: Planning
326
+ # =====================================================================
327
+ self.logger.stage("Stage 2: Planning")
328
+ await self._send_ws_update(
329
+ "progress", {"stage": "planning", "progress": {"status": "creating_plan"}}
330
+ )
331
+
332
+ plan = await self._generate_question_plan(requirement, knowledge_context, num_questions)
333
+ focuses = plan.get("focuses", [])
334
+
335
+ # Save plan.json
336
+ if batch_dir:
337
+ self._save_plan_json(batch_dir, plan)
338
+
339
+ await self._send_ws_update("plan_ready", {"plan": plan, "focuses": focuses})
340
+
341
+ # =====================================================================
342
+ # Stage 3: Generating
343
+ # =====================================================================
344
+ self.logger.stage("Stage 3: Generating")
345
+ await self._send_ws_update(
346
+ "progress",
347
+ {"stage": "generating", "progress": {"current": 0, "total": num_questions}},
348
+ )
349
+
350
+ results = []
351
+ failures = []
352
+
353
+ generate_agent = self._create_generate_agent()
354
+ analyzer = self._create_relevance_analyzer()
355
+
356
+ for idx, focus in enumerate(focuses):
357
+ question_id = focus.get("id", f"q_{idx + 1}")
358
+ self.logger.info(f"Generating question {question_id}")
359
+
360
+ await self._send_ws_update(
361
+ "question_update",
362
+ {
363
+ "question_id": question_id,
364
+ "status": "generating",
365
+ "focus": focus.get("focus", ""),
366
+ },
367
+ )
368
+
369
+ # Generate question
370
+ gen_result = await generate_agent.process(
371
+ requirement=requirement,
372
+ knowledge_context=knowledge_context,
373
+ focus=focus,
374
+ )
375
+
376
+ if not gen_result.get("success"):
377
+ self.logger.error(f"Failed to generate question {question_id}")
378
+ failures.append(
379
+ {
380
+ "question_id": question_id,
381
+ "error": gen_result.get("error", "Unknown error"),
382
+ }
383
+ )
384
+ await self._send_ws_update(
385
+ "question_update", {"question_id": question_id, "status": "error"}
386
+ )
387
+ continue
388
+
389
+ question = gen_result["question"]
390
+
391
+ # Analyze relevance
392
+ await self._send_ws_update(
393
+ "question_update", {"question_id": question_id, "status": "analyzing"}
394
+ )
395
+
396
+ analysis = await analyzer.process(
397
+ question=question,
398
+ knowledge_context=knowledge_context,
399
+ )
400
+
401
+ # Build validation dict (compatible with frontend)
402
+ validation = {
403
+ "decision": "approve",
404
+ "relevance": analysis["relevance"],
405
+ "kb_coverage": analysis["kb_coverage"],
406
+ "extension_points": analysis.get("extension_points", ""),
407
+ }
408
+
409
+ # Save result
410
+ result = {
411
+ "question_id": question_id,
412
+ "focus": focus,
413
+ "question": question,
414
+ "analysis": analysis,
415
+ "validation": validation, # For frontend compatibility
416
+ }
417
+
418
+ if batch_dir:
419
+ self._save_custom_question_result(batch_dir, result)
420
+
421
+ results.append(result)
422
+
423
+ await self._send_ws_update(
424
+ "question_update", {"question_id": question_id, "status": "done"}
425
+ )
426
+ await self._send_ws_update(
427
+ "result",
428
+ {
429
+ "question_id": question_id,
430
+ "question": question,
431
+ "validation": validation, # Frontend expects 'validation'
432
+ "focus": focus,
433
+ "index": idx,
434
+ },
435
+ )
436
+ await self._send_ws_update(
437
+ "progress",
438
+ {"stage": "generating", "progress": {"current": idx + 1, "total": num_questions}},
439
+ )
440
+
441
+ # =====================================================================
442
+ # Complete
443
+ # =====================================================================
444
+ summary = {
445
+ "success": len(results) == num_questions,
446
+ "requested": num_questions,
447
+ "completed": len(results),
448
+ "failed": len(failures),
449
+ "search_queries": queries,
450
+ "plan": plan,
451
+ "results": results,
452
+ "failures": failures,
453
+ }
454
+
455
+ if batch_dir:
456
+ summary_file = batch_dir / "summary.json"
457
+ with open(summary_file, "w", encoding="utf-8") as f:
458
+ json.dump(summary, f, indent=2, ensure_ascii=False)
459
+ summary["output_dir"] = str(batch_dir)
460
+
461
+ # Update token stats from shared LLMStats
462
+ self._update_token_stats()
463
+
464
+ await self._send_ws_update(
465
+ "progress",
466
+ {
467
+ "stage": "complete",
468
+ "completed": len(results),
469
+ "failed": len(failures),
470
+ "total": num_questions,
471
+ },
472
+ )
473
+
474
+ self.logger.section("Generation Summary")
475
+ self.logger.info(f"Requested: {num_questions}")
476
+ self.logger.info(f"Completed: {len(results)}")
477
+ self.logger.info(f"Failed: {len(failures)}")
478
+
479
+ return summary
480
+
481
+ # =========================================================================
482
+ # Helper Methods
483
+ # =========================================================================
484
+
485
+ async def _generate_question_plan(
486
+ self,
487
+ requirement: dict[str, Any],
488
+ knowledge_context: str,
489
+ num_questions: int,
490
+ ) -> dict[str, Any]:
491
+ """
492
+ Generate a question plan with distinct focuses.
493
+
494
+ Args:
495
+ requirement: Base requirement
496
+ knowledge_context: Retrieved knowledge summary
497
+ num_questions: Number of questions
498
+
499
+ Returns:
500
+ Plan dict with focuses array
501
+ """
502
+ from src.services.llm import complete as llm_complete
503
+ from src.services.llm.config import get_llm_config
504
+
505
+ llm_config = get_llm_config()
506
+
507
+ system_prompt = (
508
+ "You are an educational content planner. Create distinct question focuses "
509
+ "that test different aspects of the same topic.\n\n"
510
+ "CRITICAL: Return ONLY valid JSON. Do not wrap in markdown code blocks.\n"
511
+ 'Output JSON with key "focuses" containing an array of objects, each with:\n'
512
+ '- "id": string like "q_1", "q_2"\n'
513
+ '- "focus": string describing what aspect to test\n'
514
+ f'- "type": "{requirement.get("question_type", "written")}"'
515
+ )
516
+
517
+ # Truncate knowledge context consistently (4000 chars across all agents)
518
+ truncated_knowledge = (
519
+ knowledge_context[:4000] if len(knowledge_context) > 4000 else knowledge_context
520
+ )
521
+ truncation_suffix = "...[truncated]" if len(knowledge_context) > 4000 else ""
522
+
523
+ user_prompt = (
524
+ f"Topic: {requirement.get('knowledge_point', '')}\n"
525
+ f"Difficulty: {requirement.get('difficulty', 'medium')}\n"
526
+ f"Question Type: {requirement.get('question_type', 'written')}\n"
527
+ f"Number: {num_questions}\n\n"
528
+ f"Knowledge:\n{truncated_knowledge}{truncation_suffix}\n\n"
529
+ f"Generate exactly {num_questions} distinct focuses in JSON."
530
+ )
531
+
532
+ try:
533
+ response = await llm_complete(
534
+ prompt=user_prompt,
535
+ system_prompt=system_prompt,
536
+ model=llm_config.model,
537
+ api_key=self._api_key or llm_config.api_key,
538
+ base_url=self._base_url or llm_config.base_url,
539
+ api_version=self._api_version,
540
+ response_format={"type": "json_object"},
541
+ temperature=0.3,
542
+ )
543
+
544
+ data = json.loads(response)
545
+ focuses = data.get("focuses", [])
546
+ if not isinstance(focuses, list):
547
+ focuses = []
548
+
549
+ except Exception as e:
550
+ self.logger.warning(f"Failed to generate plan: {e}")
551
+ focuses = []
552
+
553
+ # Fallback: create simple focuses
554
+ if len(focuses) < num_questions:
555
+ question_type = requirement.get("question_type", "written")
556
+ for i in range(len(focuses), num_questions):
557
+ focuses.append(
558
+ {
559
+ "id": f"q_{i + 1}",
560
+ "focus": f"Aspect {i + 1} of {requirement.get('knowledge_point', 'topic')}",
561
+ "type": question_type,
562
+ }
563
+ )
564
+
565
+ return {
566
+ "knowledge_point": requirement.get("knowledge_point", ""),
567
+ "difficulty": requirement.get("difficulty", "medium"),
568
+ "question_type": requirement.get("question_type", "written"),
569
+ "num_questions": num_questions,
570
+ "focuses": focuses[:num_questions],
571
+ }
572
+
573
+ def _save_question_result(
574
+ self,
575
+ result: dict[str, Any],
576
+ requirement: dict[str, Any],
577
+ ) -> str | None:
578
+ """Save a single question result to disk."""
579
+ if not self.output_dir:
580
+ return None
581
+
582
+ try:
583
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
584
+ output_path = Path(self.output_dir) / f"question_{timestamp}"
585
+ output_path.mkdir(parents=True, exist_ok=True)
586
+
587
+ # Save result.json
588
+ with open(output_path / "result.json", "w", encoding="utf-8") as f:
589
+ json.dump(result, f, indent=2, ensure_ascii=False)
590
+
591
+ # Save question.md
592
+ question = result.get("question", {})
593
+ validation = result.get("validation", {})
594
+
595
+ md_content = f"""# Generated Question
596
+
597
+ **Knowledge point**: {requirement.get("knowledge_point", question.get("knowledge_point", "N/A"))}
598
+ **Difficulty**: {requirement.get("difficulty", "N/A")}
599
+ **Type**: {question.get("question_type", "N/A")}
600
+ **Relevance**: {validation.get("relevance", "N/A")}
601
+
602
+ ---
603
+
604
+ ## Question
605
+ {question.get("question", "")}
606
+
607
+ """
608
+ if question.get("options"):
609
+ md_content += "## Options\n"
610
+ for key, value in question.get("options", {}).items():
611
+ md_content += f"- **{key}**: {value}\n"
612
+ md_content += "\n"
613
+
614
+ md_content += f"""
615
+ ## Answer
616
+ {question.get("correct_answer", "")}
617
+
618
+ ## Explanation
619
+ {question.get("explanation", "")}
620
+
621
+ ---
622
+
623
+ ## Relevance Analysis
624
+
625
+ **KB Coverage**: {validation.get("kb_coverage", "")}
626
+ """
627
+ if validation.get("extension_points"):
628
+ md_content += f"\n**Extension Points**: {validation.get('extension_points', '')}"
629
+
630
+ with open(output_path / "question.md", "w", encoding="utf-8") as f:
631
+ f.write(md_content)
632
+
633
+ self.logger.info(f"Result saved to: {output_path}")
634
+ return str(output_path)
635
+
636
+ except Exception as e:
637
+ self.logger.warning(f"Failed to save result: {e}")
638
+ return None
639
+
640
+ def _save_knowledge_json(
641
+ self,
642
+ batch_dir: Path,
643
+ retrieval_result: dict[str, Any],
644
+ ):
645
+ """Save knowledge.json for a batch."""
646
+ knowledge_file = batch_dir / "knowledge.json"
647
+ with open(knowledge_file, "w", encoding="utf-8") as f:
648
+ json.dump(
649
+ {
650
+ "queries": retrieval_result.get("queries", []),
651
+ "retrievals": retrieval_result.get("retrievals", []),
652
+ },
653
+ f,
654
+ indent=2,
655
+ ensure_ascii=False,
656
+ )
657
+
658
+ def _save_plan_json(self, batch_dir: Path, plan: dict[str, Any]):
659
+ """Save plan.json for a batch."""
660
+ plan_file = batch_dir / "plan.json"
661
+ with open(plan_file, "w", encoding="utf-8") as f:
662
+ json.dump(plan, f, indent=2, ensure_ascii=False)
663
+
664
+ def _save_custom_question_result(
665
+ self,
666
+ batch_dir: Path,
667
+ result: dict[str, Any],
668
+ ):
669
+ """Save a single question result in custom mode."""
670
+ question_id = result.get("question_id", "q_unknown")
671
+ question_dir = batch_dir / question_id
672
+ question_dir.mkdir(parents=True, exist_ok=True)
673
+
674
+ # Save result.json
675
+ with open(question_dir / "result.json", "w", encoding="utf-8") as f:
676
+ json.dump(result, f, indent=2, ensure_ascii=False)
677
+
678
+ # Save question.md
679
+ question = result.get("question", {})
680
+ analysis = result.get("analysis", {})
681
+ focus = result.get("focus", {})
682
+
683
+ md_content = f"""# Generated Question
684
+
685
+ **Focus**: {focus.get("focus", "N/A")}
686
+ **Type**: {question.get("question_type", "N/A")}
687
+ **Relevance**: {analysis.get("relevance", "N/A")}
688
+
689
+ ---
690
+
691
+ ## Question
692
+ {question.get("question", "")}
693
+
694
+ """
695
+ if question.get("options"):
696
+ md_content += "## Options\n"
697
+ for key, value in question.get("options", {}).items():
698
+ md_content += f"- **{key}**: {value}\n"
699
+ md_content += "\n"
700
+
701
+ md_content += f"""
702
+ ## Answer
703
+ {question.get("correct_answer", "")}
704
+
705
+ ## Explanation
706
+ {question.get("explanation", "")}
707
+
708
+ ---
709
+
710
+ ## Relevance Analysis
711
+
712
+ **KB Coverage**: {analysis.get("kb_coverage", "")}
713
+ """
714
+ if analysis.get("extension_points"):
715
+ md_content += f"\n**Extension Points**: {analysis.get('extension_points', '')}"
716
+
717
+ with open(question_dir / "question.md", "w", encoding="utf-8") as f:
718
+ f.write(md_content)