realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,24 @@
1
+ """
2
+ Question Generation System
3
+
4
+ Modular question generation using specialized agents:
5
+ - RetrieveAgent: Knowledge base retrieval
6
+ - GenerateAgent: Question generation
7
+ - RelevanceAnalyzer: Question-KB relevance analysis
8
+ - AgentCoordinator: Workflow orchestration
9
+
10
+ Tools (moved to src/tools/question):
11
+ - parse_pdf_with_mineru
12
+ - extract_questions_from_paper
13
+ - mimic_exam_questions
14
+ """
15
+
16
+ from .agents import GenerateAgent, RelevanceAnalyzer, RetrieveAgent
17
+ from .coordinator import AgentCoordinator
18
+
19
+ __all__ = [
20
+ "RetrieveAgent",
21
+ "GenerateAgent",
22
+ "RelevanceAnalyzer",
23
+ "AgentCoordinator",
24
+ ]
@@ -0,0 +1,18 @@
1
+ """
2
+ Question Generation Agents
3
+
4
+ Specialized agents for question generation workflow:
5
+ - RetrieveAgent: Knowledge retrieval from KB
6
+ - GenerateAgent: Question generation
7
+ - RelevanceAnalyzer: Question-KB relevance analysis
8
+ """
9
+
10
+ from .generate_agent import GenerateAgent
11
+ from .relevance_analyzer import RelevanceAnalyzer
12
+ from .retrieve_agent import RetrieveAgent
13
+
14
+ __all__ = [
15
+ "RetrieveAgent",
16
+ "GenerateAgent",
17
+ "RelevanceAnalyzer",
18
+ ]
@@ -0,0 +1,381 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ GenerateAgent - Responsible for generating questions based on knowledge context.
5
+
6
+ Uses unified BaseAgent for LLM calls and configuration management.
7
+ """
8
+
9
+ import json
10
+ import re
11
+ from typing import Any
12
+
13
+ from src.agents.base_agent import BaseAgent
14
+
15
+
16
+ class GenerateAgent(BaseAgent):
17
+ """
18
+ Agent responsible for generating questions from knowledge context.
19
+
20
+ Responsibilities:
21
+ - Generate questions based on requirements and knowledge
22
+ - Support both custom mode (from scratch) and mimic mode (from reference)
23
+ - Output structured question JSON
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ language: str = "en",
29
+ **kwargs,
30
+ ):
31
+ """
32
+ Initialize GenerateAgent.
33
+
34
+ Args:
35
+ language: Language for prompts ("en" or "zh")
36
+ **kwargs: Additional arguments passed to BaseAgent
37
+ """
38
+ super().__init__(
39
+ module_name="question",
40
+ agent_name="generate_agent",
41
+ language=language,
42
+ **kwargs,
43
+ )
44
+
45
+ async def process(
46
+ self,
47
+ requirement: dict[str, Any],
48
+ knowledge_context: str,
49
+ focus: dict[str, Any] | None = None,
50
+ reference_question: str | None = None,
51
+ ) -> dict[str, Any]:
52
+ """
53
+ Main processing: generate a question.
54
+
55
+ Args:
56
+ requirement: Question requirement dict (knowledge_point, difficulty, question_type, etc.)
57
+ knowledge_context: Retrieved knowledge summary
58
+ focus: Optional focus/angle for the question
59
+ reference_question: Optional reference question for mimic mode
60
+
61
+ Returns:
62
+ Dict with:
63
+ - success: Whether generation succeeded
64
+ - question: Generated question dict (if success)
65
+ - error: Error message (if failed)
66
+ """
67
+ self.logger.info("Starting question generation")
68
+
69
+ # Build requirements string
70
+ requirements_str = json.dumps(requirement, ensure_ascii=False, indent=2)
71
+
72
+ # Build focus string
73
+ if focus:
74
+ focus_str = f"Focus: {focus.get('focus', '')}\nType: {focus.get('type', requirement.get('question_type', 'written'))}"
75
+ else:
76
+ focus_str = f"Type: {requirement.get('question_type', 'written')}"
77
+
78
+ # Choose prompt based on mode
79
+ if reference_question:
80
+ # Mimic mode
81
+ return await self._generate_with_reference(
82
+ requirements_str=requirements_str,
83
+ knowledge_context=knowledge_context,
84
+ reference_question=reference_question,
85
+ )
86
+ else:
87
+ # Custom mode
88
+ return await self._generate_custom(
89
+ requirements_str=requirements_str,
90
+ knowledge_context=knowledge_context,
91
+ focus_str=focus_str,
92
+ knowledge_point=requirement.get("knowledge_point", ""),
93
+ )
94
+
95
+ async def _generate_custom(
96
+ self,
97
+ requirements_str: str,
98
+ knowledge_context: str,
99
+ focus_str: str,
100
+ knowledge_point: str,
101
+ ) -> dict[str, Any]:
102
+ """
103
+ Generate a custom question (not based on reference).
104
+
105
+ Args:
106
+ requirements_str: JSON string of requirements
107
+ knowledge_context: Retrieved knowledge summary
108
+ focus_str: Focus/angle description
109
+ knowledge_point: Main knowledge point
110
+
111
+ Returns:
112
+ Dict with success status and question/error
113
+ """
114
+ system_prompt = self.get_prompt("system", "")
115
+ user_prompt_template = self.get_prompt("generate", "")
116
+
117
+ if not user_prompt_template:
118
+ # Fallback prompt
119
+ user_prompt_template = (
120
+ "Generate a question based on:\n"
121
+ "Requirements: {requirements}\n"
122
+ "Focus: {focus}\n"
123
+ "Knowledge: {knowledge}\n\n"
124
+ "Return JSON with question_type, question, correct_answer, explanation."
125
+ )
126
+
127
+ user_prompt = user_prompt_template.format(
128
+ requirements=requirements_str,
129
+ focus=focus_str,
130
+ knowledge=knowledge_context[:4000]
131
+ if len(knowledge_context) > 4000
132
+ else knowledge_context,
133
+ )
134
+
135
+ try:
136
+ response = await self.call_llm(
137
+ user_prompt=user_prompt,
138
+ system_prompt=system_prompt,
139
+ response_format={"type": "json_object"},
140
+ stage="generate_question",
141
+ )
142
+
143
+ question = self._parse_question_response(response)
144
+ question["knowledge_point"] = knowledge_point
145
+
146
+ self.logger.info(f"Generated {question.get('question_type', 'unknown')} question")
147
+
148
+ return {
149
+ "success": True,
150
+ "question": question,
151
+ }
152
+
153
+ except Exception as e:
154
+ self.logger.error(f"Question generation failed: {e}")
155
+ return {
156
+ "success": False,
157
+ "error": str(e),
158
+ }
159
+
160
+ async def _generate_with_reference(
161
+ self,
162
+ requirements_str: str,
163
+ knowledge_context: str,
164
+ reference_question: str,
165
+ ) -> dict[str, Any]:
166
+ """
167
+ Generate a question based on a reference (mimic mode).
168
+
169
+ Args:
170
+ requirements_str: JSON string of requirements
171
+ knowledge_context: Retrieved knowledge summary
172
+ reference_question: Reference question text
173
+
174
+ Returns:
175
+ Dict with success status and question/error
176
+ """
177
+ system_prompt = self.get_prompt("system", "")
178
+ user_prompt_template = self.get_prompt("generate_with_reference", "")
179
+
180
+ if not user_prompt_template:
181
+ # Fallback prompt
182
+ user_prompt_template = (
183
+ "Generate a new question inspired by the reference but distinct:\n"
184
+ "Reference: {reference_question}\n"
185
+ "Requirements: {requirements}\n"
186
+ "Knowledge: {knowledge}\n\n"
187
+ "Return JSON with question_type, question, correct_answer, explanation."
188
+ )
189
+
190
+ user_prompt = user_prompt_template.format(
191
+ reference_question=reference_question,
192
+ requirements=requirements_str,
193
+ knowledge=knowledge_context[:4000]
194
+ if len(knowledge_context) > 4000
195
+ else knowledge_context,
196
+ )
197
+
198
+ try:
199
+ response = await self.call_llm(
200
+ user_prompt=user_prompt,
201
+ system_prompt=system_prompt,
202
+ response_format={"type": "json_object"},
203
+ stage="generate_with_reference",
204
+ )
205
+
206
+ question = self._parse_question_response(response)
207
+
208
+ self.logger.info(f"Generated mimic {question.get('question_type', 'unknown')} question")
209
+
210
+ return {
211
+ "success": True,
212
+ "question": question,
213
+ }
214
+
215
+ except Exception as e:
216
+ self.logger.error(f"Reference-based generation failed: {e}")
217
+ return {
218
+ "success": False,
219
+ "error": str(e),
220
+ }
221
+
222
+ def _parse_question_response(self, response: str) -> dict[str, Any]:
223
+ """
224
+ Parse LLM response into question dict.
225
+
226
+ Uses robust JSON extraction that handles:
227
+ - Markdown code blocks
228
+ - Control characters in LaTeX formulas
229
+ - Python triple-quoted strings
230
+ - Partial JSON extraction
231
+
232
+ Args:
233
+ response: LLM response string
234
+
235
+ Returns:
236
+ Parsed question dict
237
+
238
+ Raises:
239
+ ValueError: If parsing fails
240
+ """
241
+ if not response or not response.strip():
242
+ raise ValueError("LLM returned empty response")
243
+
244
+ # Try to extract JSON from markdown code blocks if present
245
+ json_content = self._extract_json_from_markdown(response)
246
+
247
+ # Clean control characters that may break JSON parsing
248
+ json_content = self._clean_json_string(json_content)
249
+
250
+ # Try multiple parsing strategies
251
+ question = None
252
+ parse_error = None
253
+
254
+ # Strategy 1: Direct parse
255
+ try:
256
+ question = json.loads(json_content)
257
+ except json.JSONDecodeError as e:
258
+ parse_error = e
259
+
260
+ # Strategy 2: Try extracting JSON object pattern
261
+ if question is None:
262
+ json_obj_pattern = re.compile(r"\{[\s\S]*\}")
263
+ match = json_obj_pattern.search(json_content)
264
+ if match:
265
+ try:
266
+ question = json.loads(match.group(0))
267
+ except json.JSONDecodeError:
268
+ pass
269
+
270
+ # Strategy 3: Try fixing common LLM JSON issues
271
+ if question is None:
272
+ try:
273
+ fixed_content = self._fix_common_json_issues(json_content)
274
+ question = json.loads(fixed_content)
275
+ except json.JSONDecodeError:
276
+ pass
277
+
278
+ if question is None:
279
+ raise ValueError(f"Failed to parse question JSON: {parse_error}") from parse_error
280
+
281
+ # Validate required fields
282
+ if "question" not in question:
283
+ raise ValueError("Question response missing 'question' field")
284
+
285
+ # Ensure question_type exists
286
+ if "question_type" not in question:
287
+ question["question_type"] = "written"
288
+
289
+ # Validate options for choice questions
290
+ if question.get("question_type") == "choice":
291
+ options = question.get("options")
292
+ if not options:
293
+ # Create default options if missing
294
+ self.logger.warning("Choice question missing options, adding placeholder")
295
+ question["options"] = {
296
+ "A": "Option A (placeholder)",
297
+ "B": "Option B (placeholder)",
298
+ "C": "Option C (placeholder)",
299
+ "D": "Option D (placeholder)",
300
+ }
301
+ elif not isinstance(options, dict):
302
+ # Convert to dict if it's a list or other format
303
+ self.logger.warning(f"Options is not a dict: {type(options)}, converting")
304
+ if isinstance(options, list):
305
+ question["options"] = {
306
+ chr(65 + i): str(opt) for i, opt in enumerate(options[:4])
307
+ }
308
+ else:
309
+ question["options"] = {"A": str(options)}
310
+ elif len(options) < 2:
311
+ self.logger.warning(f"Choice question has only {len(options)} options")
312
+
313
+ return question
314
+
315
+ def _clean_json_string(self, json_str: str) -> str:
316
+ """
317
+ Clean JSON string by removing/escaping problematic characters.
318
+
319
+ Handles:
320
+ - Control characters (0x00-0x1f except tab, newline, carriage return)
321
+ - Unescaped newlines inside string values
322
+ """
323
+ if not json_str:
324
+ return json_str
325
+
326
+ # Remove most control characters but keep \t, \n, \r
327
+ # These can appear in LLM output and break JSON parsing
328
+ cleaned = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]", "", json_str)
329
+
330
+ return cleaned
331
+
332
+ def _fix_common_json_issues(self, content: str) -> str:
333
+ """
334
+ Attempt to fix common JSON issues from LLM output.
335
+
336
+ Fixes:
337
+ - Python triple-quoted strings converted to JSON strings
338
+ - Trailing commas before closing braces/brackets
339
+ """
340
+ if not content:
341
+ return content
342
+
343
+ # Fix Python triple-quoted strings (LLMs sometimes generate these)
344
+ def replace_triple_quotes(match: re.Match) -> str:
345
+ inner = match.group(1)
346
+ # Use json.dumps to properly escape the content
347
+ return json.dumps(inner)
348
+
349
+ content = re.sub(r'"""([\s\S]*?)"""', replace_triple_quotes, content)
350
+
351
+ # Remove trailing commas before } or ]
352
+ content = re.sub(r",\s*([}\]])", r"\1", content)
353
+
354
+ return content
355
+
356
+ def _extract_json_from_markdown(self, content: str) -> str:
357
+ """
358
+ Extract JSON from markdown code blocks.
359
+
360
+ LLMs often wrap JSON in ```json ... ``` blocks. This method strips
361
+ the markdown formatting and any surrounding text.
362
+
363
+ Args:
364
+ content: Raw LLM response
365
+
366
+ Returns:
367
+ Extracted JSON string
368
+ """
369
+ if not content:
370
+ return content
371
+
372
+ # Try to find JSON code block
373
+ json_block_pattern = r"```(?:json)?\s*\n?(.*?)```"
374
+ matches = re.findall(json_block_pattern, content, re.DOTALL)
375
+
376
+ if matches:
377
+ # Return the content inside the first code block
378
+ return matches[0].strip()
379
+
380
+ # If no code blocks found, return as-is (might already be valid JSON)
381
+ return content.strip()
@@ -0,0 +1,207 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ RelevanceAnalyzer - Analyzes the relevance between questions and knowledge base.
5
+
6
+ Replaces the old validation workflow with a single-pass relevance analysis.
7
+ No iterative validation or rejection - all questions are accepted and analyzed.
8
+ """
9
+
10
+ import json
11
+ import re
12
+ from typing import Any
13
+
14
+ from src.agents.base_agent import BaseAgent
15
+
16
+
17
+ class RelevanceAnalyzer(BaseAgent):
18
+ """
19
+ Agent responsible for analyzing question-knowledge relevance.
20
+
21
+ Key difference from old ValidationWorkflow:
22
+ - NO rejection: all questions are accepted
23
+ - NO iteration: single-pass analysis
24
+ - Output: relevance level (high/partial) with explanations
25
+
26
+ Responsibilities:
27
+ - Analyze how well a question aligns with knowledge base content
28
+ - Identify what KB concepts the question tests
29
+ - Identify any extensions beyond the KB (for "partial" relevance)
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ language: str = "en",
35
+ **kwargs,
36
+ ):
37
+ """
38
+ Initialize RelevanceAnalyzer.
39
+
40
+ Args:
41
+ language: Language for prompts ("en" or "zh")
42
+ **kwargs: Additional arguments passed to BaseAgent
43
+ """
44
+ super().__init__(
45
+ module_name="question",
46
+ agent_name="relevance_analyzer",
47
+ language=language,
48
+ **kwargs,
49
+ )
50
+
51
+ async def process(
52
+ self,
53
+ question: dict[str, Any],
54
+ knowledge_context: str,
55
+ ) -> dict[str, Any]:
56
+ """
57
+ Main processing: analyze relevance between question and knowledge.
58
+
59
+ Args:
60
+ question: Generated question dict
61
+ knowledge_context: Retrieved knowledge summary
62
+
63
+ Returns:
64
+ Dict with:
65
+ - relevance: "high" or "partial"
66
+ - kb_coverage: Description of KB content tested
67
+ - extension_points: Description of extensions (only if partial)
68
+ """
69
+ self.logger.info("Starting relevance analysis")
70
+
71
+ # Format question for analysis
72
+ question_str = json.dumps(question, ensure_ascii=False, indent=2)
73
+
74
+ # Truncate context if too long
75
+ if len(knowledge_context) > 4000:
76
+ knowledge_context = knowledge_context[:4000] + "...[truncated]"
77
+
78
+ # Get prompts
79
+ system_prompt = self.get_prompt("system", "")
80
+ user_prompt_template = self.get_prompt("analyze_relevance", "")
81
+
82
+ if not user_prompt_template:
83
+ # Fallback prompt
84
+ user_prompt_template = (
85
+ "Analyze the relevance between this question and knowledge base:\n\n"
86
+ "Question:\n{question}\n\n"
87
+ "Knowledge Base:\n{knowledge}\n\n"
88
+ "Return JSON with: relevance (high/partial), kb_coverage, extension_points"
89
+ )
90
+
91
+ user_prompt = user_prompt_template.format(
92
+ question=question_str,
93
+ knowledge=knowledge_context,
94
+ )
95
+
96
+ try:
97
+ response = await self.call_llm(
98
+ user_prompt=user_prompt,
99
+ system_prompt=system_prompt,
100
+ response_format={"type": "json_object"},
101
+ temperature=0.3, # Lower temperature for more consistent analysis
102
+ stage="analyze_relevance",
103
+ )
104
+
105
+ result = self._parse_analysis_response(response)
106
+
107
+ self.logger.info(f"Relevance analysis completed: {result['relevance']}")
108
+
109
+ return result
110
+
111
+ except Exception as e:
112
+ self.logger.warning(f"Relevance analysis failed: {e}")
113
+ # Return default "partial" on failure
114
+ return {
115
+ "relevance": "partial",
116
+ "kb_coverage": "Unable to analyze knowledge base coverage",
117
+ "extension_points": f"Analysis could not be completed: {e}",
118
+ }
119
+
120
+ def _parse_analysis_response(self, response: str) -> dict[str, Any]:
121
+ """
122
+ Parse LLM response into analysis result.
123
+
124
+ Uses robust JSON extraction that handles control characters
125
+ and common LLM output issues.
126
+
127
+ Args:
128
+ response: LLM response string
129
+
130
+ Returns:
131
+ Parsed analysis dict with normalized relevance value
132
+ """
133
+ if not response or not response.strip():
134
+ raise ValueError("LLM returned empty response")
135
+
136
+ # Try to extract JSON from markdown code blocks if present
137
+ json_content = self._extract_json_from_markdown(response)
138
+
139
+ # Clean control characters
140
+ json_content = self._clean_json_string(json_content)
141
+
142
+ # Try multiple parsing strategies
143
+ result = None
144
+ parse_error = None
145
+
146
+ # Strategy 1: Direct parse
147
+ try:
148
+ result = json.loads(json_content)
149
+ except json.JSONDecodeError as e:
150
+ parse_error = e
151
+
152
+ # Strategy 2: Extract JSON object pattern
153
+ if result is None:
154
+ json_obj_pattern = re.compile(r"\{[\s\S]*\}")
155
+ match = json_obj_pattern.search(json_content)
156
+ if match:
157
+ try:
158
+ result = json.loads(match.group(0))
159
+ except json.JSONDecodeError:
160
+ pass
161
+
162
+ if result is None:
163
+ raise ValueError(f"Failed to parse analysis JSON: {parse_error}") from parse_error
164
+
165
+ # Normalize relevance value
166
+ relevance = result.get("relevance", "partial")
167
+ if relevance not in ["high", "partial"]:
168
+ relevance = "partial"
169
+
170
+ return {
171
+ "relevance": relevance,
172
+ "kb_coverage": result.get("kb_coverage", ""),
173
+ "extension_points": result.get("extension_points", "")
174
+ if relevance == "partial"
175
+ else "",
176
+ }
177
+
178
+ def _clean_json_string(self, json_str: str) -> str:
179
+ """
180
+ Clean JSON string by removing problematic control characters.
181
+ """
182
+ if not json_str:
183
+ return json_str
184
+ # Remove control characters except tab, newline, carriage return
185
+ return re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]", "", json_str)
186
+
187
+ def _extract_json_from_markdown(self, content: str) -> str:
188
+ """
189
+ Extract JSON from markdown code blocks.
190
+
191
+ Args:
192
+ content: Raw LLM response
193
+
194
+ Returns:
195
+ Extracted JSON string
196
+ """
197
+ if not content:
198
+ return content
199
+
200
+ # Try to find JSON code block
201
+ json_block_pattern = r"```(?:json)?\s*\n?(.*?)```"
202
+ matches = re.findall(json_block_pattern, content, re.DOTALL)
203
+
204
+ if matches:
205
+ return matches[0].strip()
206
+
207
+ return content.strip()