realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Question Generation System
|
|
3
|
+
|
|
4
|
+
Modular question generation using specialized agents:
|
|
5
|
+
- RetrieveAgent: Knowledge base retrieval
|
|
6
|
+
- GenerateAgent: Question generation
|
|
7
|
+
- RelevanceAnalyzer: Question-KB relevance analysis
|
|
8
|
+
- AgentCoordinator: Workflow orchestration
|
|
9
|
+
|
|
10
|
+
Tools (moved to src/tools/question):
|
|
11
|
+
- parse_pdf_with_mineru
|
|
12
|
+
- extract_questions_from_paper
|
|
13
|
+
- mimic_exam_questions
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from .agents import GenerateAgent, RelevanceAnalyzer, RetrieveAgent
|
|
17
|
+
from .coordinator import AgentCoordinator
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"RetrieveAgent",
|
|
21
|
+
"GenerateAgent",
|
|
22
|
+
"RelevanceAnalyzer",
|
|
23
|
+
"AgentCoordinator",
|
|
24
|
+
]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Question Generation Agents
|
|
3
|
+
|
|
4
|
+
Specialized agents for question generation workflow:
|
|
5
|
+
- RetrieveAgent: Knowledge retrieval from KB
|
|
6
|
+
- GenerateAgent: Question generation
|
|
7
|
+
- RelevanceAnalyzer: Question-KB relevance analysis
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .generate_agent import GenerateAgent
|
|
11
|
+
from .relevance_analyzer import RelevanceAnalyzer
|
|
12
|
+
from .retrieve_agent import RetrieveAgent
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"RetrieveAgent",
|
|
16
|
+
"GenerateAgent",
|
|
17
|
+
"RelevanceAnalyzer",
|
|
18
|
+
]
|
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
GenerateAgent - Responsible for generating questions based on knowledge context.
|
|
5
|
+
|
|
6
|
+
Uses unified BaseAgent for LLM calls and configuration management.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import re
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from src.agents.base_agent import BaseAgent
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class GenerateAgent(BaseAgent):
|
|
17
|
+
"""
|
|
18
|
+
Agent responsible for generating questions from knowledge context.
|
|
19
|
+
|
|
20
|
+
Responsibilities:
|
|
21
|
+
- Generate questions based on requirements and knowledge
|
|
22
|
+
- Support both custom mode (from scratch) and mimic mode (from reference)
|
|
23
|
+
- Output structured question JSON
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
language: str = "en",
|
|
29
|
+
**kwargs,
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Initialize GenerateAgent.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
language: Language for prompts ("en" or "zh")
|
|
36
|
+
**kwargs: Additional arguments passed to BaseAgent
|
|
37
|
+
"""
|
|
38
|
+
super().__init__(
|
|
39
|
+
module_name="question",
|
|
40
|
+
agent_name="generate_agent",
|
|
41
|
+
language=language,
|
|
42
|
+
**kwargs,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
async def process(
|
|
46
|
+
self,
|
|
47
|
+
requirement: dict[str, Any],
|
|
48
|
+
knowledge_context: str,
|
|
49
|
+
focus: dict[str, Any] | None = None,
|
|
50
|
+
reference_question: str | None = None,
|
|
51
|
+
) -> dict[str, Any]:
|
|
52
|
+
"""
|
|
53
|
+
Main processing: generate a question.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
requirement: Question requirement dict (knowledge_point, difficulty, question_type, etc.)
|
|
57
|
+
knowledge_context: Retrieved knowledge summary
|
|
58
|
+
focus: Optional focus/angle for the question
|
|
59
|
+
reference_question: Optional reference question for mimic mode
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Dict with:
|
|
63
|
+
- success: Whether generation succeeded
|
|
64
|
+
- question: Generated question dict (if success)
|
|
65
|
+
- error: Error message (if failed)
|
|
66
|
+
"""
|
|
67
|
+
self.logger.info("Starting question generation")
|
|
68
|
+
|
|
69
|
+
# Build requirements string
|
|
70
|
+
requirements_str = json.dumps(requirement, ensure_ascii=False, indent=2)
|
|
71
|
+
|
|
72
|
+
# Build focus string
|
|
73
|
+
if focus:
|
|
74
|
+
focus_str = f"Focus: {focus.get('focus', '')}\nType: {focus.get('type', requirement.get('question_type', 'written'))}"
|
|
75
|
+
else:
|
|
76
|
+
focus_str = f"Type: {requirement.get('question_type', 'written')}"
|
|
77
|
+
|
|
78
|
+
# Choose prompt based on mode
|
|
79
|
+
if reference_question:
|
|
80
|
+
# Mimic mode
|
|
81
|
+
return await self._generate_with_reference(
|
|
82
|
+
requirements_str=requirements_str,
|
|
83
|
+
knowledge_context=knowledge_context,
|
|
84
|
+
reference_question=reference_question,
|
|
85
|
+
)
|
|
86
|
+
else:
|
|
87
|
+
# Custom mode
|
|
88
|
+
return await self._generate_custom(
|
|
89
|
+
requirements_str=requirements_str,
|
|
90
|
+
knowledge_context=knowledge_context,
|
|
91
|
+
focus_str=focus_str,
|
|
92
|
+
knowledge_point=requirement.get("knowledge_point", ""),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
async def _generate_custom(
|
|
96
|
+
self,
|
|
97
|
+
requirements_str: str,
|
|
98
|
+
knowledge_context: str,
|
|
99
|
+
focus_str: str,
|
|
100
|
+
knowledge_point: str,
|
|
101
|
+
) -> dict[str, Any]:
|
|
102
|
+
"""
|
|
103
|
+
Generate a custom question (not based on reference).
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
requirements_str: JSON string of requirements
|
|
107
|
+
knowledge_context: Retrieved knowledge summary
|
|
108
|
+
focus_str: Focus/angle description
|
|
109
|
+
knowledge_point: Main knowledge point
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Dict with success status and question/error
|
|
113
|
+
"""
|
|
114
|
+
system_prompt = self.get_prompt("system", "")
|
|
115
|
+
user_prompt_template = self.get_prompt("generate", "")
|
|
116
|
+
|
|
117
|
+
if not user_prompt_template:
|
|
118
|
+
# Fallback prompt
|
|
119
|
+
user_prompt_template = (
|
|
120
|
+
"Generate a question based on:\n"
|
|
121
|
+
"Requirements: {requirements}\n"
|
|
122
|
+
"Focus: {focus}\n"
|
|
123
|
+
"Knowledge: {knowledge}\n\n"
|
|
124
|
+
"Return JSON with question_type, question, correct_answer, explanation."
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
user_prompt = user_prompt_template.format(
|
|
128
|
+
requirements=requirements_str,
|
|
129
|
+
focus=focus_str,
|
|
130
|
+
knowledge=knowledge_context[:4000]
|
|
131
|
+
if len(knowledge_context) > 4000
|
|
132
|
+
else knowledge_context,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
response = await self.call_llm(
|
|
137
|
+
user_prompt=user_prompt,
|
|
138
|
+
system_prompt=system_prompt,
|
|
139
|
+
response_format={"type": "json_object"},
|
|
140
|
+
stage="generate_question",
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
question = self._parse_question_response(response)
|
|
144
|
+
question["knowledge_point"] = knowledge_point
|
|
145
|
+
|
|
146
|
+
self.logger.info(f"Generated {question.get('question_type', 'unknown')} question")
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
"success": True,
|
|
150
|
+
"question": question,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
except Exception as e:
|
|
154
|
+
self.logger.error(f"Question generation failed: {e}")
|
|
155
|
+
return {
|
|
156
|
+
"success": False,
|
|
157
|
+
"error": str(e),
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
async def _generate_with_reference(
|
|
161
|
+
self,
|
|
162
|
+
requirements_str: str,
|
|
163
|
+
knowledge_context: str,
|
|
164
|
+
reference_question: str,
|
|
165
|
+
) -> dict[str, Any]:
|
|
166
|
+
"""
|
|
167
|
+
Generate a question based on a reference (mimic mode).
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
requirements_str: JSON string of requirements
|
|
171
|
+
knowledge_context: Retrieved knowledge summary
|
|
172
|
+
reference_question: Reference question text
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Dict with success status and question/error
|
|
176
|
+
"""
|
|
177
|
+
system_prompt = self.get_prompt("system", "")
|
|
178
|
+
user_prompt_template = self.get_prompt("generate_with_reference", "")
|
|
179
|
+
|
|
180
|
+
if not user_prompt_template:
|
|
181
|
+
# Fallback prompt
|
|
182
|
+
user_prompt_template = (
|
|
183
|
+
"Generate a new question inspired by the reference but distinct:\n"
|
|
184
|
+
"Reference: {reference_question}\n"
|
|
185
|
+
"Requirements: {requirements}\n"
|
|
186
|
+
"Knowledge: {knowledge}\n\n"
|
|
187
|
+
"Return JSON with question_type, question, correct_answer, explanation."
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
user_prompt = user_prompt_template.format(
|
|
191
|
+
reference_question=reference_question,
|
|
192
|
+
requirements=requirements_str,
|
|
193
|
+
knowledge=knowledge_context[:4000]
|
|
194
|
+
if len(knowledge_context) > 4000
|
|
195
|
+
else knowledge_context,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
response = await self.call_llm(
|
|
200
|
+
user_prompt=user_prompt,
|
|
201
|
+
system_prompt=system_prompt,
|
|
202
|
+
response_format={"type": "json_object"},
|
|
203
|
+
stage="generate_with_reference",
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
question = self._parse_question_response(response)
|
|
207
|
+
|
|
208
|
+
self.logger.info(f"Generated mimic {question.get('question_type', 'unknown')} question")
|
|
209
|
+
|
|
210
|
+
return {
|
|
211
|
+
"success": True,
|
|
212
|
+
"question": question,
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
except Exception as e:
|
|
216
|
+
self.logger.error(f"Reference-based generation failed: {e}")
|
|
217
|
+
return {
|
|
218
|
+
"success": False,
|
|
219
|
+
"error": str(e),
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
def _parse_question_response(self, response: str) -> dict[str, Any]:
|
|
223
|
+
"""
|
|
224
|
+
Parse LLM response into question dict.
|
|
225
|
+
|
|
226
|
+
Uses robust JSON extraction that handles:
|
|
227
|
+
- Markdown code blocks
|
|
228
|
+
- Control characters in LaTeX formulas
|
|
229
|
+
- Python triple-quoted strings
|
|
230
|
+
- Partial JSON extraction
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
response: LLM response string
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
Parsed question dict
|
|
237
|
+
|
|
238
|
+
Raises:
|
|
239
|
+
ValueError: If parsing fails
|
|
240
|
+
"""
|
|
241
|
+
if not response or not response.strip():
|
|
242
|
+
raise ValueError("LLM returned empty response")
|
|
243
|
+
|
|
244
|
+
# Try to extract JSON from markdown code blocks if present
|
|
245
|
+
json_content = self._extract_json_from_markdown(response)
|
|
246
|
+
|
|
247
|
+
# Clean control characters that may break JSON parsing
|
|
248
|
+
json_content = self._clean_json_string(json_content)
|
|
249
|
+
|
|
250
|
+
# Try multiple parsing strategies
|
|
251
|
+
question = None
|
|
252
|
+
parse_error = None
|
|
253
|
+
|
|
254
|
+
# Strategy 1: Direct parse
|
|
255
|
+
try:
|
|
256
|
+
question = json.loads(json_content)
|
|
257
|
+
except json.JSONDecodeError as e:
|
|
258
|
+
parse_error = e
|
|
259
|
+
|
|
260
|
+
# Strategy 2: Try extracting JSON object pattern
|
|
261
|
+
if question is None:
|
|
262
|
+
json_obj_pattern = re.compile(r"\{[\s\S]*\}")
|
|
263
|
+
match = json_obj_pattern.search(json_content)
|
|
264
|
+
if match:
|
|
265
|
+
try:
|
|
266
|
+
question = json.loads(match.group(0))
|
|
267
|
+
except json.JSONDecodeError:
|
|
268
|
+
pass
|
|
269
|
+
|
|
270
|
+
# Strategy 3: Try fixing common LLM JSON issues
|
|
271
|
+
if question is None:
|
|
272
|
+
try:
|
|
273
|
+
fixed_content = self._fix_common_json_issues(json_content)
|
|
274
|
+
question = json.loads(fixed_content)
|
|
275
|
+
except json.JSONDecodeError:
|
|
276
|
+
pass
|
|
277
|
+
|
|
278
|
+
if question is None:
|
|
279
|
+
raise ValueError(f"Failed to parse question JSON: {parse_error}") from parse_error
|
|
280
|
+
|
|
281
|
+
# Validate required fields
|
|
282
|
+
if "question" not in question:
|
|
283
|
+
raise ValueError("Question response missing 'question' field")
|
|
284
|
+
|
|
285
|
+
# Ensure question_type exists
|
|
286
|
+
if "question_type" not in question:
|
|
287
|
+
question["question_type"] = "written"
|
|
288
|
+
|
|
289
|
+
# Validate options for choice questions
|
|
290
|
+
if question.get("question_type") == "choice":
|
|
291
|
+
options = question.get("options")
|
|
292
|
+
if not options:
|
|
293
|
+
# Create default options if missing
|
|
294
|
+
self.logger.warning("Choice question missing options, adding placeholder")
|
|
295
|
+
question["options"] = {
|
|
296
|
+
"A": "Option A (placeholder)",
|
|
297
|
+
"B": "Option B (placeholder)",
|
|
298
|
+
"C": "Option C (placeholder)",
|
|
299
|
+
"D": "Option D (placeholder)",
|
|
300
|
+
}
|
|
301
|
+
elif not isinstance(options, dict):
|
|
302
|
+
# Convert to dict if it's a list or other format
|
|
303
|
+
self.logger.warning(f"Options is not a dict: {type(options)}, converting")
|
|
304
|
+
if isinstance(options, list):
|
|
305
|
+
question["options"] = {
|
|
306
|
+
chr(65 + i): str(opt) for i, opt in enumerate(options[:4])
|
|
307
|
+
}
|
|
308
|
+
else:
|
|
309
|
+
question["options"] = {"A": str(options)}
|
|
310
|
+
elif len(options) < 2:
|
|
311
|
+
self.logger.warning(f"Choice question has only {len(options)} options")
|
|
312
|
+
|
|
313
|
+
return question
|
|
314
|
+
|
|
315
|
+
def _clean_json_string(self, json_str: str) -> str:
|
|
316
|
+
"""
|
|
317
|
+
Clean JSON string by removing/escaping problematic characters.
|
|
318
|
+
|
|
319
|
+
Handles:
|
|
320
|
+
- Control characters (0x00-0x1f except tab, newline, carriage return)
|
|
321
|
+
- Unescaped newlines inside string values
|
|
322
|
+
"""
|
|
323
|
+
if not json_str:
|
|
324
|
+
return json_str
|
|
325
|
+
|
|
326
|
+
# Remove most control characters but keep \t, \n, \r
|
|
327
|
+
# These can appear in LLM output and break JSON parsing
|
|
328
|
+
cleaned = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]", "", json_str)
|
|
329
|
+
|
|
330
|
+
return cleaned
|
|
331
|
+
|
|
332
|
+
def _fix_common_json_issues(self, content: str) -> str:
|
|
333
|
+
"""
|
|
334
|
+
Attempt to fix common JSON issues from LLM output.
|
|
335
|
+
|
|
336
|
+
Fixes:
|
|
337
|
+
- Python triple-quoted strings converted to JSON strings
|
|
338
|
+
- Trailing commas before closing braces/brackets
|
|
339
|
+
"""
|
|
340
|
+
if not content:
|
|
341
|
+
return content
|
|
342
|
+
|
|
343
|
+
# Fix Python triple-quoted strings (LLMs sometimes generate these)
|
|
344
|
+
def replace_triple_quotes(match: re.Match) -> str:
|
|
345
|
+
inner = match.group(1)
|
|
346
|
+
# Use json.dumps to properly escape the content
|
|
347
|
+
return json.dumps(inner)
|
|
348
|
+
|
|
349
|
+
content = re.sub(r'"""([\s\S]*?)"""', replace_triple_quotes, content)
|
|
350
|
+
|
|
351
|
+
# Remove trailing commas before } or ]
|
|
352
|
+
content = re.sub(r",\s*([}\]])", r"\1", content)
|
|
353
|
+
|
|
354
|
+
return content
|
|
355
|
+
|
|
356
|
+
def _extract_json_from_markdown(self, content: str) -> str:
|
|
357
|
+
"""
|
|
358
|
+
Extract JSON from markdown code blocks.
|
|
359
|
+
|
|
360
|
+
LLMs often wrap JSON in ```json ... ``` blocks. This method strips
|
|
361
|
+
the markdown formatting and any surrounding text.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
content: Raw LLM response
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
Extracted JSON string
|
|
368
|
+
"""
|
|
369
|
+
if not content:
|
|
370
|
+
return content
|
|
371
|
+
|
|
372
|
+
# Try to find JSON code block
|
|
373
|
+
json_block_pattern = r"```(?:json)?\s*\n?(.*?)```"
|
|
374
|
+
matches = re.findall(json_block_pattern, content, re.DOTALL)
|
|
375
|
+
|
|
376
|
+
if matches:
|
|
377
|
+
# Return the content inside the first code block
|
|
378
|
+
return matches[0].strip()
|
|
379
|
+
|
|
380
|
+
# If no code blocks found, return as-is (might already be valid JSON)
|
|
381
|
+
return content.strip()
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
RelevanceAnalyzer - Analyzes the relevance between questions and knowledge base.
|
|
5
|
+
|
|
6
|
+
Replaces the old validation workflow with a single-pass relevance analysis.
|
|
7
|
+
No iterative validation or rejection - all questions are accepted and analyzed.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from src.agents.base_agent import BaseAgent
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RelevanceAnalyzer(BaseAgent):
|
|
18
|
+
"""
|
|
19
|
+
Agent responsible for analyzing question-knowledge relevance.
|
|
20
|
+
|
|
21
|
+
Key difference from old ValidationWorkflow:
|
|
22
|
+
- NO rejection: all questions are accepted
|
|
23
|
+
- NO iteration: single-pass analysis
|
|
24
|
+
- Output: relevance level (high/partial) with explanations
|
|
25
|
+
|
|
26
|
+
Responsibilities:
|
|
27
|
+
- Analyze how well a question aligns with knowledge base content
|
|
28
|
+
- Identify what KB concepts the question tests
|
|
29
|
+
- Identify any extensions beyond the KB (for "partial" relevance)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
language: str = "en",
|
|
35
|
+
**kwargs,
|
|
36
|
+
):
|
|
37
|
+
"""
|
|
38
|
+
Initialize RelevanceAnalyzer.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
language: Language for prompts ("en" or "zh")
|
|
42
|
+
**kwargs: Additional arguments passed to BaseAgent
|
|
43
|
+
"""
|
|
44
|
+
super().__init__(
|
|
45
|
+
module_name="question",
|
|
46
|
+
agent_name="relevance_analyzer",
|
|
47
|
+
language=language,
|
|
48
|
+
**kwargs,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
async def process(
|
|
52
|
+
self,
|
|
53
|
+
question: dict[str, Any],
|
|
54
|
+
knowledge_context: str,
|
|
55
|
+
) -> dict[str, Any]:
|
|
56
|
+
"""
|
|
57
|
+
Main processing: analyze relevance between question and knowledge.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
question: Generated question dict
|
|
61
|
+
knowledge_context: Retrieved knowledge summary
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Dict with:
|
|
65
|
+
- relevance: "high" or "partial"
|
|
66
|
+
- kb_coverage: Description of KB content tested
|
|
67
|
+
- extension_points: Description of extensions (only if partial)
|
|
68
|
+
"""
|
|
69
|
+
self.logger.info("Starting relevance analysis")
|
|
70
|
+
|
|
71
|
+
# Format question for analysis
|
|
72
|
+
question_str = json.dumps(question, ensure_ascii=False, indent=2)
|
|
73
|
+
|
|
74
|
+
# Truncate context if too long
|
|
75
|
+
if len(knowledge_context) > 4000:
|
|
76
|
+
knowledge_context = knowledge_context[:4000] + "...[truncated]"
|
|
77
|
+
|
|
78
|
+
# Get prompts
|
|
79
|
+
system_prompt = self.get_prompt("system", "")
|
|
80
|
+
user_prompt_template = self.get_prompt("analyze_relevance", "")
|
|
81
|
+
|
|
82
|
+
if not user_prompt_template:
|
|
83
|
+
# Fallback prompt
|
|
84
|
+
user_prompt_template = (
|
|
85
|
+
"Analyze the relevance between this question and knowledge base:\n\n"
|
|
86
|
+
"Question:\n{question}\n\n"
|
|
87
|
+
"Knowledge Base:\n{knowledge}\n\n"
|
|
88
|
+
"Return JSON with: relevance (high/partial), kb_coverage, extension_points"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
user_prompt = user_prompt_template.format(
|
|
92
|
+
question=question_str,
|
|
93
|
+
knowledge=knowledge_context,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
response = await self.call_llm(
|
|
98
|
+
user_prompt=user_prompt,
|
|
99
|
+
system_prompt=system_prompt,
|
|
100
|
+
response_format={"type": "json_object"},
|
|
101
|
+
temperature=0.3, # Lower temperature for more consistent analysis
|
|
102
|
+
stage="analyze_relevance",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
result = self._parse_analysis_response(response)
|
|
106
|
+
|
|
107
|
+
self.logger.info(f"Relevance analysis completed: {result['relevance']}")
|
|
108
|
+
|
|
109
|
+
return result
|
|
110
|
+
|
|
111
|
+
except Exception as e:
|
|
112
|
+
self.logger.warning(f"Relevance analysis failed: {e}")
|
|
113
|
+
# Return default "partial" on failure
|
|
114
|
+
return {
|
|
115
|
+
"relevance": "partial",
|
|
116
|
+
"kb_coverage": "Unable to analyze knowledge base coverage",
|
|
117
|
+
"extension_points": f"Analysis could not be completed: {e}",
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
def _parse_analysis_response(self, response: str) -> dict[str, Any]:
|
|
121
|
+
"""
|
|
122
|
+
Parse LLM response into analysis result.
|
|
123
|
+
|
|
124
|
+
Uses robust JSON extraction that handles control characters
|
|
125
|
+
and common LLM output issues.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
response: LLM response string
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Parsed analysis dict with normalized relevance value
|
|
132
|
+
"""
|
|
133
|
+
if not response or not response.strip():
|
|
134
|
+
raise ValueError("LLM returned empty response")
|
|
135
|
+
|
|
136
|
+
# Try to extract JSON from markdown code blocks if present
|
|
137
|
+
json_content = self._extract_json_from_markdown(response)
|
|
138
|
+
|
|
139
|
+
# Clean control characters
|
|
140
|
+
json_content = self._clean_json_string(json_content)
|
|
141
|
+
|
|
142
|
+
# Try multiple parsing strategies
|
|
143
|
+
result = None
|
|
144
|
+
parse_error = None
|
|
145
|
+
|
|
146
|
+
# Strategy 1: Direct parse
|
|
147
|
+
try:
|
|
148
|
+
result = json.loads(json_content)
|
|
149
|
+
except json.JSONDecodeError as e:
|
|
150
|
+
parse_error = e
|
|
151
|
+
|
|
152
|
+
# Strategy 2: Extract JSON object pattern
|
|
153
|
+
if result is None:
|
|
154
|
+
json_obj_pattern = re.compile(r"\{[\s\S]*\}")
|
|
155
|
+
match = json_obj_pattern.search(json_content)
|
|
156
|
+
if match:
|
|
157
|
+
try:
|
|
158
|
+
result = json.loads(match.group(0))
|
|
159
|
+
except json.JSONDecodeError:
|
|
160
|
+
pass
|
|
161
|
+
|
|
162
|
+
if result is None:
|
|
163
|
+
raise ValueError(f"Failed to parse analysis JSON: {parse_error}") from parse_error
|
|
164
|
+
|
|
165
|
+
# Normalize relevance value
|
|
166
|
+
relevance = result.get("relevance", "partial")
|
|
167
|
+
if relevance not in ["high", "partial"]:
|
|
168
|
+
relevance = "partial"
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
"relevance": relevance,
|
|
172
|
+
"kb_coverage": result.get("kb_coverage", ""),
|
|
173
|
+
"extension_points": result.get("extension_points", "")
|
|
174
|
+
if relevance == "partial"
|
|
175
|
+
else "",
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
def _clean_json_string(self, json_str: str) -> str:
|
|
179
|
+
"""
|
|
180
|
+
Clean JSON string by removing problematic control characters.
|
|
181
|
+
"""
|
|
182
|
+
if not json_str:
|
|
183
|
+
return json_str
|
|
184
|
+
# Remove control characters except tab, newline, carriage return
|
|
185
|
+
return re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]", "", json_str)
|
|
186
|
+
|
|
187
|
+
def _extract_json_from_markdown(self, content: str) -> str:
|
|
188
|
+
"""
|
|
189
|
+
Extract JSON from markdown code blocks.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
content: Raw LLM response
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Extracted JSON string
|
|
196
|
+
"""
|
|
197
|
+
if not content:
|
|
198
|
+
return content
|
|
199
|
+
|
|
200
|
+
# Try to find JSON code block
|
|
201
|
+
json_block_pattern = r"```(?:json)?\s*\n?(.*?)```"
|
|
202
|
+
matches = re.findall(json_block_pattern, content, re.DOTALL)
|
|
203
|
+
|
|
204
|
+
if matches:
|
|
205
|
+
return matches[0].strip()
|
|
206
|
+
|
|
207
|
+
return content.strip()
|