realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,537 @@
1
+ import asyncio
2
+ import base64
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ import re
6
+ import sys
7
+ import traceback
8
+
9
+ from fastapi import APIRouter, WebSocket, WebSocketDisconnect
10
+
11
+ from src.agents.question import AgentCoordinator
12
+ from src.api.utils.history import ActivityType, history_manager
13
+ from src.api.utils.log_interceptor import LogInterceptor
14
+ from src.api.utils.task_id_manager import TaskIDManager
15
+ from src.tools.question import mimic_exam_questions
16
+ from src.utils.document_validator import DocumentValidator
17
+ from src.utils.error_utils import format_exception_message
18
+
19
+ # Add project root for imports
20
+ project_root = Path(__file__).parent.parent.parent.parent
21
+ sys.path.insert(0, str(project_root))
22
+
23
+ from src.logging import get_logger
24
+ from src.services.config import load_config_with_main
25
+ from src.services.llm.config import get_llm_config
26
+
27
+ # Setup module logger with unified logging system (from config)
28
+ project_root = Path(__file__).parent.parent.parent.parent
29
+ config = load_config_with_main("question_config.yaml", project_root)
30
+ log_dir = config.get("paths", {}).get("user_log_dir") or config.get("logging", {}).get("log_dir")
31
+ logger = get_logger("QuestionAPI", log_dir=log_dir)
32
+
33
+ router = APIRouter()
34
+
35
+ # Output directory for mimic mode - use data/user/question
36
+ PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
37
+ MIMIC_OUTPUT_DIR = PROJECT_ROOT / "data" / "user" / "question" / "mimic_papers"
38
+
39
+
40
+ @router.websocket("/mimic")
41
+ async def websocket_mimic_generate(websocket: WebSocket):
42
+ """
43
+ WebSocket endpoint for mimic exam paper question generation.
44
+
45
+ Supports two modes:
46
+ 1. Upload PDF directly via WebSocket (base64 encoded)
47
+ 2. Use a pre-parsed paper directory path
48
+
49
+ Message format for PDF upload:
50
+ {
51
+ "mode": "upload",
52
+ "pdf_data": "base64_encoded_pdf_content",
53
+ "pdf_name": "exam.pdf",
54
+ "kb_name": "knowledge_base_name",
55
+ "max_questions": 5 // optional
56
+ }
57
+
58
+ Message format for pre-parsed:
59
+ {
60
+ "mode": "parsed",
61
+ "paper_path": "directory_name",
62
+ "kb_name": "knowledge_base_name",
63
+ "max_questions": 5 // optional
64
+ }
65
+ """
66
+ await websocket.accept()
67
+
68
+ pusher_task = None
69
+ original_stdout = sys.stdout
70
+
71
+ try:
72
+ # 1. Wait for config
73
+ data = await websocket.receive_json()
74
+ mode = data.get("mode", "parsed") # "upload" or "parsed"
75
+ kb_name = data.get("kb_name", "ai_textbook")
76
+ max_questions = data.get("max_questions")
77
+
78
+ logger.info(f"Starting mimic generation (mode: {mode}, kb: {kb_name})")
79
+
80
+ # 2. Setup Log Queue
81
+ log_queue = asyncio.Queue()
82
+
83
+ async def log_pusher():
84
+ while True:
85
+ entry = await log_queue.get()
86
+ try:
87
+ await websocket.send_json(entry)
88
+ except Exception:
89
+ break
90
+ log_queue.task_done()
91
+
92
+ pusher_task = asyncio.create_task(log_pusher())
93
+
94
+ # 3. Stdout interceptor for capturing prints
95
+ # ANSI escape sequence pattern for stripping color codes
96
+ ANSI_ESCAPE_PATTERN = re.compile(r"\x1b\[[0-9;]*[a-zA-Z]")
97
+
98
+ class StdoutInterceptor:
99
+ def __init__(self, queue, original):
100
+ self.queue = queue
101
+ self.original_stdout = original
102
+ self._closed = False
103
+
104
+ def write(self, message):
105
+ if self._closed:
106
+ return
107
+ # Write to terminal first (with ANSI codes for color)
108
+ try:
109
+ self.original_stdout.write(message)
110
+ except Exception:
111
+ pass
112
+ # Strip ANSI escape codes before sending to frontend
113
+ clean_message = ANSI_ESCAPE_PATTERN.sub("", message).strip()
114
+ # Then send to frontend (non-blocking)
115
+ if clean_message:
116
+ try:
117
+ self.queue.put_nowait(
118
+ {
119
+ "type": "log",
120
+ "content": clean_message,
121
+ "timestamp": asyncio.get_event_loop().time(),
122
+ }
123
+ )
124
+ except (asyncio.QueueFull, RuntimeError):
125
+ pass
126
+
127
+ def flush(self):
128
+ if not self._closed:
129
+ try:
130
+ self.original_stdout.flush()
131
+ except Exception:
132
+ pass
133
+
134
+ def close(self):
135
+ """Mark interceptor as closed to prevent further writes."""
136
+ self._closed = True
137
+
138
+ interceptor = StdoutInterceptor(log_queue, original_stdout)
139
+ sys.stdout = interceptor
140
+
141
+ try:
142
+ await websocket.send_json(
143
+ {"type": "status", "stage": "init", "content": "Initializing..."}
144
+ )
145
+
146
+ pdf_path = None
147
+ paper_dir = None
148
+
149
+ # Handle PDF upload mode
150
+ if mode == "upload":
151
+ pdf_data = data.get("pdf_data")
152
+ pdf_name = data.get("pdf_name", "exam.pdf")
153
+
154
+ if not pdf_data:
155
+ await websocket.send_json(
156
+ {"type": "error", "content": "PDF data is required for upload mode"}
157
+ )
158
+ return
159
+
160
+ # Decode PDF data first to check size
161
+ try:
162
+ pdf_bytes = base64.b64decode(pdf_data)
163
+ except Exception as e:
164
+ await websocket.send_json(
165
+ {"type": "error", "content": f"Invalid base64 PDF data: {e}"}
166
+ )
167
+ return
168
+
169
+ # Pre-validate filename and file size before writing
170
+ try:
171
+ safe_name = DocumentValidator.validate_upload_safety(
172
+ pdf_name, len(pdf_bytes), {".pdf"}
173
+ )
174
+ except ValueError as e:
175
+ await websocket.send_json({"type": "error", "content": str(e)})
176
+ return
177
+
178
+ # Create batch directory for this mimic session
179
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
180
+ pdf_stem = Path(safe_name).stem
181
+ batch_dir = MIMIC_OUTPUT_DIR / f"mimic_{timestamp}_{pdf_stem}"
182
+ batch_dir.mkdir(parents=True, exist_ok=True)
183
+
184
+ # Save uploaded PDF in batch directory
185
+ pdf_path = batch_dir / safe_name
186
+
187
+ await websocket.send_json(
188
+ {"type": "status", "stage": "upload", "content": f"Saving PDF: {safe_name}"}
189
+ )
190
+
191
+ # Write the validated PDF bytes
192
+ with open(pdf_path, "wb") as f:
193
+ f.write(pdf_bytes)
194
+
195
+ # Additional validation (file readability, etc.)
196
+ try:
197
+ DocumentValidator.validate_file(pdf_path)
198
+ except (ValueError, FileNotFoundError, PermissionError) as e:
199
+ # Clean up invalid or inaccessible file
200
+ pdf_path.unlink(missing_ok=True)
201
+ await websocket.send_json({"type": "error", "content": str(e)})
202
+ return
203
+
204
+ await websocket.send_json(
205
+ {
206
+ "type": "status",
207
+ "stage": "parsing",
208
+ "content": "Parsing PDF exam paper (MinerU)...",
209
+ }
210
+ )
211
+ logger.info(f"Saved and validated uploaded PDF to: {pdf_path}")
212
+
213
+ # Pass batch_dir as output directory
214
+ pdf_path = str(pdf_path)
215
+ output_dir = str(batch_dir)
216
+
217
+ elif mode == "parsed":
218
+ paper_path = data.get("paper_path")
219
+ if not paper_path:
220
+ await websocket.send_json(
221
+ {"type": "error", "content": "paper_path is required for parsed mode"}
222
+ )
223
+ return
224
+ paper_dir = paper_path
225
+
226
+ # Create batch directory for parsed mode too
227
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
228
+ batch_dir = MIMIC_OUTPUT_DIR / f"mimic_{timestamp}_{Path(paper_path).name}"
229
+ batch_dir.mkdir(parents=True, exist_ok=True)
230
+ output_dir = str(batch_dir)
231
+
232
+ else:
233
+ await websocket.send_json({"type": "error", "content": f"Unknown mode: {mode}"})
234
+ return
235
+
236
+ # Create WebSocket callback for real-time progress updates
237
+ async def ws_callback(event_type: str, data: dict):
238
+ """Send progress updates to the frontend via WebSocket."""
239
+ try:
240
+ message = {"type": event_type, **data}
241
+ await websocket.send_json(message)
242
+ except Exception as e:
243
+ logger.debug(f"WebSocket send failed: {e}")
244
+
245
+ # Run the complete mimic workflow with callback
246
+ await websocket.send_json(
247
+ {
248
+ "type": "status",
249
+ "stage": "processing",
250
+ "content": "Executing question generation workflow...",
251
+ }
252
+ )
253
+
254
+ result = await mimic_exam_questions(
255
+ pdf_path=pdf_path,
256
+ paper_dir=paper_dir,
257
+ kb_name=kb_name,
258
+ output_dir=output_dir,
259
+ max_questions=max_questions,
260
+ ws_callback=ws_callback,
261
+ )
262
+
263
+ if result.get("success"):
264
+ # Results are already sent via ws_callback during generation
265
+ # Just send the final complete signal
266
+ total_ref = result.get("total_reference_questions", 0)
267
+ generated = result.get("generated_questions", [])
268
+ failed = result.get("failed_questions", [])
269
+
270
+ logger.success(
271
+ f"Mimic generation complete: {len(generated)} succeeded, {len(failed)} failed"
272
+ )
273
+
274
+ try:
275
+ await websocket.send_json({"type": "complete"})
276
+ except (RuntimeError, WebSocketDisconnect):
277
+ logger.debug("WebSocket closed before complete signal could be sent")
278
+ else:
279
+ error_msg = result.get("error", "Unknown error")
280
+ try:
281
+ await websocket.send_json({"type": "error", "content": error_msg})
282
+ except (RuntimeError, WebSocketDisconnect):
283
+ pass
284
+ logger.error(f"Mimic generation failed: {error_msg}")
285
+
286
+ finally:
287
+ # Close interceptor and restore stdout
288
+ if "interceptor" in locals():
289
+ interceptor.close()
290
+ sys.stdout = original_stdout
291
+
292
+ except WebSocketDisconnect:
293
+ logger.debug("Client disconnected during mimic generation")
294
+ except Exception as e:
295
+ logger.exception("Mimic generation error")
296
+ error_msg = format_exception_message(e)
297
+ try:
298
+ await websocket.send_json({"type": "error", "content": error_msg})
299
+ except Exception:
300
+ pass
301
+ finally:
302
+ # Ensure stdout is always restored
303
+ sys.stdout = original_stdout
304
+
305
+ # Clean up pusher task
306
+ if pusher_task:
307
+ try:
308
+ pusher_task.cancel()
309
+ await pusher_task
310
+ except asyncio.CancelledError:
311
+ pass # Expected when cancelling
312
+ except Exception:
313
+ pass
314
+
315
+ # Drain any remaining items in the queue
316
+ try:
317
+ while not log_queue.empty():
318
+ log_queue.get_nowait()
319
+ except Exception:
320
+ pass
321
+
322
+ # Close WebSocket
323
+ try:
324
+ await websocket.close()
325
+ except Exception:
326
+ pass
327
+
328
+
329
+ @router.websocket("/generate")
330
+ async def websocket_question_generate(websocket: WebSocket):
331
+ await websocket.accept()
332
+
333
+ # Get task ID manager
334
+ task_manager = TaskIDManager.get_instance()
335
+
336
+ try:
337
+ # 1. Wait for config
338
+ data = await websocket.receive_json()
339
+ requirement = data.get("requirement")
340
+ kb_name = data.get("kb_name", "ai_textbook")
341
+ count = data.get("count", 1)
342
+
343
+ if not requirement:
344
+ try:
345
+ await websocket.send_json({"type": "error", "content": "Requirement is required"})
346
+ except (RuntimeError, WebSocketDisconnect):
347
+ pass
348
+ return
349
+
350
+ # Generate task ID
351
+ task_key = f"question_{kb_name}_{hash(str(requirement))}"
352
+ task_id = task_manager.generate_task_id("question_gen", task_key)
353
+
354
+ # Send task ID to frontend
355
+ try:
356
+ await websocket.send_json({"type": "task_id", "task_id": task_id})
357
+ except (RuntimeError, WebSocketDisconnect):
358
+ logger.debug("WebSocket closed, cannot send task_id")
359
+ return
360
+
361
+ logger.info(
362
+ f"[{task_id}] Starting question generation: {requirement.get('knowledge_point', 'Unknown')}"
363
+ )
364
+
365
+ # 2. Initialize Coordinator
366
+ # Define unified output directory (DeepTutor/data/user/question)
367
+ root_dir = Path(__file__).parent.parent.parent.parent
368
+ output_base = root_dir / "data" / "user" / "question"
369
+
370
+ try:
371
+ llm_config = get_llm_config()
372
+ api_key = llm_config.api_key
373
+ base_url = llm_config.base_url
374
+ api_version = getattr(llm_config, "api_version", None)
375
+ except Exception:
376
+ api_key = None
377
+ base_url = None
378
+ api_version = None
379
+
380
+ coordinator = AgentCoordinator(
381
+ api_key=api_key,
382
+ base_url=base_url,
383
+ api_version=api_version,
384
+ kb_name=kb_name,
385
+ max_rounds=10,
386
+ output_dir=str(output_base),
387
+ )
388
+
389
+ # 3. Setup Log Queue for WebSocket streaming
390
+ log_queue = asyncio.Queue()
391
+
392
+ # WebSocket callback for coordinator to send structured updates
393
+ async def ws_callback(data: dict):
394
+ try:
395
+ await log_queue.put(data)
396
+ except Exception:
397
+ pass
398
+
399
+ coordinator.set_ws_callback(ws_callback)
400
+
401
+ # 4. Define background pusher for logs
402
+ async def log_pusher():
403
+ while True:
404
+ entry = await log_queue.get()
405
+ try:
406
+ await websocket.send_json(entry)
407
+ except Exception:
408
+ break
409
+ log_queue.task_done()
410
+
411
+ pusher_task = asyncio.create_task(log_pusher())
412
+
413
+ # 5. Setup LogInterceptor for capturing logger output (same as solve.py)
414
+ # Get the coordinator's logger to intercept
415
+ target_logger = coordinator.logger.logger
416
+ interceptor = LogInterceptor(target_logger, log_queue)
417
+
418
+ # 6. Run Generation with LogInterceptor
419
+ try:
420
+ with interceptor:
421
+ try:
422
+ await websocket.send_json({"type": "status", "content": "started"})
423
+ except (RuntimeError, WebSocketDisconnect):
424
+ logger.debug("WebSocket closed, stopping question generation")
425
+ return
426
+
427
+ # Use custom mode generation (new streamlined flow)
428
+ logger.info(f"Starting custom mode generation for {count} question(s)")
429
+
430
+ # Use the new custom generation method
431
+ batch_result = await coordinator.generate_questions_custom(
432
+ requirement=requirement,
433
+ num_questions=count,
434
+ )
435
+
436
+ # Results are already sent via WebSocket callbacks in the coordinator
437
+ # Just need to save to history for successful results
438
+ for result in batch_result.get("results", []):
439
+ # Save to history
440
+ history_manager.add_entry(
441
+ activity_type=ActivityType.QUESTION,
442
+ title=f"{requirement.get('knowledge_point', 'Question')} ({requirement.get('question_type')})",
443
+ content={
444
+ "requirement": requirement,
445
+ "question": result.get("question", {}),
446
+ "validation": result.get("validation", {}),
447
+ "kb_name": kb_name,
448
+ },
449
+ summary=result.get("question", {}).get("question", "")[:100],
450
+ )
451
+
452
+ # Send final token stats
453
+ try:
454
+ await websocket.send_json(
455
+ {"type": "token_stats", "stats": coordinator.token_stats}
456
+ )
457
+ except (RuntimeError, WebSocketDisconnect):
458
+ logger.debug("WebSocket closed, stopping question generation")
459
+
460
+ # Send batch summary
461
+ try:
462
+ await websocket.send_json(
463
+ {
464
+ "type": "batch_summary",
465
+ "requested": batch_result.get("requested", count),
466
+ "completed": batch_result.get("completed", 0),
467
+ "failed": batch_result.get("failed", 0),
468
+ "plan": batch_result.get("plan", {}),
469
+ }
470
+ )
471
+ except (RuntimeError, WebSocketDisconnect):
472
+ pass
473
+
474
+ if not batch_result.get("success"):
475
+ logger.warning(
476
+ f"Question generation had failures: {batch_result.get('failed', 0)} failed"
477
+ )
478
+
479
+ # Wait for any pending messages in the queue to be sent
480
+ # Give the pusher a moment to process remaining messages
481
+ await asyncio.sleep(0.1)
482
+ while not log_queue.empty():
483
+ await asyncio.sleep(0.05)
484
+
485
+ # Send complete signal
486
+ try:
487
+ await websocket.send_json({"type": "complete"})
488
+ logger.info(f"[{task_id}] Question generation completed")
489
+ task_manager.update_task_status(task_id, "completed")
490
+ except (RuntimeError, WebSocketDisconnect):
491
+ logger.debug("WebSocket closed, cannot send complete signal")
492
+
493
+ except Exception as e:
494
+ error_msg = format_exception_message(e)
495
+ error_traceback = traceback.format_exc()
496
+ logger.error(f"Question generation error: {error_msg}")
497
+ logger.error(f"Error traceback:\n{error_traceback}")
498
+
499
+ # Log additional context if available
500
+ try:
501
+ if "result" in locals():
502
+ logger.error(
503
+ f"Result type: {type(result)}, result keys: {result.keys() if isinstance(result, dict) else 'N/A'}"
504
+ )
505
+ if isinstance(result, dict) and "validation" in result:
506
+ validation = result["validation"]
507
+ logger.error(f"Validation type: {type(validation)}")
508
+ if isinstance(validation, dict):
509
+ logger.error(f"Validation keys: {validation.keys()}")
510
+ logger.error(
511
+ f"Issues type: {type(validation.get('issues'))}, value: {validation.get('issues')}"
512
+ )
513
+ logger.error(
514
+ f"Suggestions type: {type(validation.get('suggestions'))}, value: {validation.get('suggestions')}"
515
+ )
516
+ except Exception as context_error:
517
+ logger.warning(f"Failed to log error context: {context_error}")
518
+
519
+ try:
520
+ await websocket.send_json({"type": "error", "content": error_msg})
521
+ except (RuntimeError, WebSocketDisconnect):
522
+ logger.debug("WebSocket closed, cannot send error message")
523
+ task_manager.update_task_status(task_id, "error", error=error_msg)
524
+
525
+ finally:
526
+ pusher_task.cancel()
527
+ try:
528
+ await pusher_task
529
+ except asyncio.CancelledError:
530
+ pass
531
+ await websocket.close()
532
+
533
+ except WebSocketDisconnect:
534
+ logger.debug("Client disconnected")
535
+ except Exception as e:
536
+ error_msg = format_exception_message(e)
537
+ logger.error(f"WebSocket error: {error_msg}")