realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,537 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import base64
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import re
|
|
6
|
+
import sys
|
|
7
|
+
import traceback
|
|
8
|
+
|
|
9
|
+
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
|
|
10
|
+
|
|
11
|
+
from src.agents.question import AgentCoordinator
|
|
12
|
+
from src.api.utils.history import ActivityType, history_manager
|
|
13
|
+
from src.api.utils.log_interceptor import LogInterceptor
|
|
14
|
+
from src.api.utils.task_id_manager import TaskIDManager
|
|
15
|
+
from src.tools.question import mimic_exam_questions
|
|
16
|
+
from src.utils.document_validator import DocumentValidator
|
|
17
|
+
from src.utils.error_utils import format_exception_message
|
|
18
|
+
|
|
19
|
+
# Add project root for imports
|
|
20
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
|
21
|
+
sys.path.insert(0, str(project_root))
|
|
22
|
+
|
|
23
|
+
from src.logging import get_logger
|
|
24
|
+
from src.services.config import load_config_with_main
|
|
25
|
+
from src.services.llm.config import get_llm_config
|
|
26
|
+
|
|
27
|
+
# Setup module logger with unified logging system (from config)
|
|
28
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
|
29
|
+
config = load_config_with_main("question_config.yaml", project_root)
|
|
30
|
+
log_dir = config.get("paths", {}).get("user_log_dir") or config.get("logging", {}).get("log_dir")
|
|
31
|
+
logger = get_logger("QuestionAPI", log_dir=log_dir)
|
|
32
|
+
|
|
33
|
+
router = APIRouter()
|
|
34
|
+
|
|
35
|
+
# Output directory for mimic mode - use data/user/question
|
|
36
|
+
PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
|
|
37
|
+
MIMIC_OUTPUT_DIR = PROJECT_ROOT / "data" / "user" / "question" / "mimic_papers"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@router.websocket("/mimic")
|
|
41
|
+
async def websocket_mimic_generate(websocket: WebSocket):
|
|
42
|
+
"""
|
|
43
|
+
WebSocket endpoint for mimic exam paper question generation.
|
|
44
|
+
|
|
45
|
+
Supports two modes:
|
|
46
|
+
1. Upload PDF directly via WebSocket (base64 encoded)
|
|
47
|
+
2. Use a pre-parsed paper directory path
|
|
48
|
+
|
|
49
|
+
Message format for PDF upload:
|
|
50
|
+
{
|
|
51
|
+
"mode": "upload",
|
|
52
|
+
"pdf_data": "base64_encoded_pdf_content",
|
|
53
|
+
"pdf_name": "exam.pdf",
|
|
54
|
+
"kb_name": "knowledge_base_name",
|
|
55
|
+
"max_questions": 5 // optional
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
Message format for pre-parsed:
|
|
59
|
+
{
|
|
60
|
+
"mode": "parsed",
|
|
61
|
+
"paper_path": "directory_name",
|
|
62
|
+
"kb_name": "knowledge_base_name",
|
|
63
|
+
"max_questions": 5 // optional
|
|
64
|
+
}
|
|
65
|
+
"""
|
|
66
|
+
await websocket.accept()
|
|
67
|
+
|
|
68
|
+
pusher_task = None
|
|
69
|
+
original_stdout = sys.stdout
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
# 1. Wait for config
|
|
73
|
+
data = await websocket.receive_json()
|
|
74
|
+
mode = data.get("mode", "parsed") # "upload" or "parsed"
|
|
75
|
+
kb_name = data.get("kb_name", "ai_textbook")
|
|
76
|
+
max_questions = data.get("max_questions")
|
|
77
|
+
|
|
78
|
+
logger.info(f"Starting mimic generation (mode: {mode}, kb: {kb_name})")
|
|
79
|
+
|
|
80
|
+
# 2. Setup Log Queue
|
|
81
|
+
log_queue = asyncio.Queue()
|
|
82
|
+
|
|
83
|
+
async def log_pusher():
|
|
84
|
+
while True:
|
|
85
|
+
entry = await log_queue.get()
|
|
86
|
+
try:
|
|
87
|
+
await websocket.send_json(entry)
|
|
88
|
+
except Exception:
|
|
89
|
+
break
|
|
90
|
+
log_queue.task_done()
|
|
91
|
+
|
|
92
|
+
pusher_task = asyncio.create_task(log_pusher())
|
|
93
|
+
|
|
94
|
+
# 3. Stdout interceptor for capturing prints
|
|
95
|
+
# ANSI escape sequence pattern for stripping color codes
|
|
96
|
+
ANSI_ESCAPE_PATTERN = re.compile(r"\x1b\[[0-9;]*[a-zA-Z]")
|
|
97
|
+
|
|
98
|
+
class StdoutInterceptor:
|
|
99
|
+
def __init__(self, queue, original):
|
|
100
|
+
self.queue = queue
|
|
101
|
+
self.original_stdout = original
|
|
102
|
+
self._closed = False
|
|
103
|
+
|
|
104
|
+
def write(self, message):
|
|
105
|
+
if self._closed:
|
|
106
|
+
return
|
|
107
|
+
# Write to terminal first (with ANSI codes for color)
|
|
108
|
+
try:
|
|
109
|
+
self.original_stdout.write(message)
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
# Strip ANSI escape codes before sending to frontend
|
|
113
|
+
clean_message = ANSI_ESCAPE_PATTERN.sub("", message).strip()
|
|
114
|
+
# Then send to frontend (non-blocking)
|
|
115
|
+
if clean_message:
|
|
116
|
+
try:
|
|
117
|
+
self.queue.put_nowait(
|
|
118
|
+
{
|
|
119
|
+
"type": "log",
|
|
120
|
+
"content": clean_message,
|
|
121
|
+
"timestamp": asyncio.get_event_loop().time(),
|
|
122
|
+
}
|
|
123
|
+
)
|
|
124
|
+
except (asyncio.QueueFull, RuntimeError):
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
def flush(self):
|
|
128
|
+
if not self._closed:
|
|
129
|
+
try:
|
|
130
|
+
self.original_stdout.flush()
|
|
131
|
+
except Exception:
|
|
132
|
+
pass
|
|
133
|
+
|
|
134
|
+
def close(self):
|
|
135
|
+
"""Mark interceptor as closed to prevent further writes."""
|
|
136
|
+
self._closed = True
|
|
137
|
+
|
|
138
|
+
interceptor = StdoutInterceptor(log_queue, original_stdout)
|
|
139
|
+
sys.stdout = interceptor
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
await websocket.send_json(
|
|
143
|
+
{"type": "status", "stage": "init", "content": "Initializing..."}
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
pdf_path = None
|
|
147
|
+
paper_dir = None
|
|
148
|
+
|
|
149
|
+
# Handle PDF upload mode
|
|
150
|
+
if mode == "upload":
|
|
151
|
+
pdf_data = data.get("pdf_data")
|
|
152
|
+
pdf_name = data.get("pdf_name", "exam.pdf")
|
|
153
|
+
|
|
154
|
+
if not pdf_data:
|
|
155
|
+
await websocket.send_json(
|
|
156
|
+
{"type": "error", "content": "PDF data is required for upload mode"}
|
|
157
|
+
)
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
# Decode PDF data first to check size
|
|
161
|
+
try:
|
|
162
|
+
pdf_bytes = base64.b64decode(pdf_data)
|
|
163
|
+
except Exception as e:
|
|
164
|
+
await websocket.send_json(
|
|
165
|
+
{"type": "error", "content": f"Invalid base64 PDF data: {e}"}
|
|
166
|
+
)
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
# Pre-validate filename and file size before writing
|
|
170
|
+
try:
|
|
171
|
+
safe_name = DocumentValidator.validate_upload_safety(
|
|
172
|
+
pdf_name, len(pdf_bytes), {".pdf"}
|
|
173
|
+
)
|
|
174
|
+
except ValueError as e:
|
|
175
|
+
await websocket.send_json({"type": "error", "content": str(e)})
|
|
176
|
+
return
|
|
177
|
+
|
|
178
|
+
# Create batch directory for this mimic session
|
|
179
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
180
|
+
pdf_stem = Path(safe_name).stem
|
|
181
|
+
batch_dir = MIMIC_OUTPUT_DIR / f"mimic_{timestamp}_{pdf_stem}"
|
|
182
|
+
batch_dir.mkdir(parents=True, exist_ok=True)
|
|
183
|
+
|
|
184
|
+
# Save uploaded PDF in batch directory
|
|
185
|
+
pdf_path = batch_dir / safe_name
|
|
186
|
+
|
|
187
|
+
await websocket.send_json(
|
|
188
|
+
{"type": "status", "stage": "upload", "content": f"Saving PDF: {safe_name}"}
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Write the validated PDF bytes
|
|
192
|
+
with open(pdf_path, "wb") as f:
|
|
193
|
+
f.write(pdf_bytes)
|
|
194
|
+
|
|
195
|
+
# Additional validation (file readability, etc.)
|
|
196
|
+
try:
|
|
197
|
+
DocumentValidator.validate_file(pdf_path)
|
|
198
|
+
except (ValueError, FileNotFoundError, PermissionError) as e:
|
|
199
|
+
# Clean up invalid or inaccessible file
|
|
200
|
+
pdf_path.unlink(missing_ok=True)
|
|
201
|
+
await websocket.send_json({"type": "error", "content": str(e)})
|
|
202
|
+
return
|
|
203
|
+
|
|
204
|
+
await websocket.send_json(
|
|
205
|
+
{
|
|
206
|
+
"type": "status",
|
|
207
|
+
"stage": "parsing",
|
|
208
|
+
"content": "Parsing PDF exam paper (MinerU)...",
|
|
209
|
+
}
|
|
210
|
+
)
|
|
211
|
+
logger.info(f"Saved and validated uploaded PDF to: {pdf_path}")
|
|
212
|
+
|
|
213
|
+
# Pass batch_dir as output directory
|
|
214
|
+
pdf_path = str(pdf_path)
|
|
215
|
+
output_dir = str(batch_dir)
|
|
216
|
+
|
|
217
|
+
elif mode == "parsed":
|
|
218
|
+
paper_path = data.get("paper_path")
|
|
219
|
+
if not paper_path:
|
|
220
|
+
await websocket.send_json(
|
|
221
|
+
{"type": "error", "content": "paper_path is required for parsed mode"}
|
|
222
|
+
)
|
|
223
|
+
return
|
|
224
|
+
paper_dir = paper_path
|
|
225
|
+
|
|
226
|
+
# Create batch directory for parsed mode too
|
|
227
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
228
|
+
batch_dir = MIMIC_OUTPUT_DIR / f"mimic_{timestamp}_{Path(paper_path).name}"
|
|
229
|
+
batch_dir.mkdir(parents=True, exist_ok=True)
|
|
230
|
+
output_dir = str(batch_dir)
|
|
231
|
+
|
|
232
|
+
else:
|
|
233
|
+
await websocket.send_json({"type": "error", "content": f"Unknown mode: {mode}"})
|
|
234
|
+
return
|
|
235
|
+
|
|
236
|
+
# Create WebSocket callback for real-time progress updates
|
|
237
|
+
async def ws_callback(event_type: str, data: dict):
|
|
238
|
+
"""Send progress updates to the frontend via WebSocket."""
|
|
239
|
+
try:
|
|
240
|
+
message = {"type": event_type, **data}
|
|
241
|
+
await websocket.send_json(message)
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logger.debug(f"WebSocket send failed: {e}")
|
|
244
|
+
|
|
245
|
+
# Run the complete mimic workflow with callback
|
|
246
|
+
await websocket.send_json(
|
|
247
|
+
{
|
|
248
|
+
"type": "status",
|
|
249
|
+
"stage": "processing",
|
|
250
|
+
"content": "Executing question generation workflow...",
|
|
251
|
+
}
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
result = await mimic_exam_questions(
|
|
255
|
+
pdf_path=pdf_path,
|
|
256
|
+
paper_dir=paper_dir,
|
|
257
|
+
kb_name=kb_name,
|
|
258
|
+
output_dir=output_dir,
|
|
259
|
+
max_questions=max_questions,
|
|
260
|
+
ws_callback=ws_callback,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
if result.get("success"):
|
|
264
|
+
# Results are already sent via ws_callback during generation
|
|
265
|
+
# Just send the final complete signal
|
|
266
|
+
total_ref = result.get("total_reference_questions", 0)
|
|
267
|
+
generated = result.get("generated_questions", [])
|
|
268
|
+
failed = result.get("failed_questions", [])
|
|
269
|
+
|
|
270
|
+
logger.success(
|
|
271
|
+
f"Mimic generation complete: {len(generated)} succeeded, {len(failed)} failed"
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
await websocket.send_json({"type": "complete"})
|
|
276
|
+
except (RuntimeError, WebSocketDisconnect):
|
|
277
|
+
logger.debug("WebSocket closed before complete signal could be sent")
|
|
278
|
+
else:
|
|
279
|
+
error_msg = result.get("error", "Unknown error")
|
|
280
|
+
try:
|
|
281
|
+
await websocket.send_json({"type": "error", "content": error_msg})
|
|
282
|
+
except (RuntimeError, WebSocketDisconnect):
|
|
283
|
+
pass
|
|
284
|
+
logger.error(f"Mimic generation failed: {error_msg}")
|
|
285
|
+
|
|
286
|
+
finally:
|
|
287
|
+
# Close interceptor and restore stdout
|
|
288
|
+
if "interceptor" in locals():
|
|
289
|
+
interceptor.close()
|
|
290
|
+
sys.stdout = original_stdout
|
|
291
|
+
|
|
292
|
+
except WebSocketDisconnect:
|
|
293
|
+
logger.debug("Client disconnected during mimic generation")
|
|
294
|
+
except Exception as e:
|
|
295
|
+
logger.exception("Mimic generation error")
|
|
296
|
+
error_msg = format_exception_message(e)
|
|
297
|
+
try:
|
|
298
|
+
await websocket.send_json({"type": "error", "content": error_msg})
|
|
299
|
+
except Exception:
|
|
300
|
+
pass
|
|
301
|
+
finally:
|
|
302
|
+
# Ensure stdout is always restored
|
|
303
|
+
sys.stdout = original_stdout
|
|
304
|
+
|
|
305
|
+
# Clean up pusher task
|
|
306
|
+
if pusher_task:
|
|
307
|
+
try:
|
|
308
|
+
pusher_task.cancel()
|
|
309
|
+
await pusher_task
|
|
310
|
+
except asyncio.CancelledError:
|
|
311
|
+
pass # Expected when cancelling
|
|
312
|
+
except Exception:
|
|
313
|
+
pass
|
|
314
|
+
|
|
315
|
+
# Drain any remaining items in the queue
|
|
316
|
+
try:
|
|
317
|
+
while not log_queue.empty():
|
|
318
|
+
log_queue.get_nowait()
|
|
319
|
+
except Exception:
|
|
320
|
+
pass
|
|
321
|
+
|
|
322
|
+
# Close WebSocket
|
|
323
|
+
try:
|
|
324
|
+
await websocket.close()
|
|
325
|
+
except Exception:
|
|
326
|
+
pass
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
@router.websocket("/generate")
|
|
330
|
+
async def websocket_question_generate(websocket: WebSocket):
|
|
331
|
+
await websocket.accept()
|
|
332
|
+
|
|
333
|
+
# Get task ID manager
|
|
334
|
+
task_manager = TaskIDManager.get_instance()
|
|
335
|
+
|
|
336
|
+
try:
|
|
337
|
+
# 1. Wait for config
|
|
338
|
+
data = await websocket.receive_json()
|
|
339
|
+
requirement = data.get("requirement")
|
|
340
|
+
kb_name = data.get("kb_name", "ai_textbook")
|
|
341
|
+
count = data.get("count", 1)
|
|
342
|
+
|
|
343
|
+
if not requirement:
|
|
344
|
+
try:
|
|
345
|
+
await websocket.send_json({"type": "error", "content": "Requirement is required"})
|
|
346
|
+
except (RuntimeError, WebSocketDisconnect):
|
|
347
|
+
pass
|
|
348
|
+
return
|
|
349
|
+
|
|
350
|
+
# Generate task ID
|
|
351
|
+
task_key = f"question_{kb_name}_{hash(str(requirement))}"
|
|
352
|
+
task_id = task_manager.generate_task_id("question_gen", task_key)
|
|
353
|
+
|
|
354
|
+
# Send task ID to frontend
|
|
355
|
+
try:
|
|
356
|
+
await websocket.send_json({"type": "task_id", "task_id": task_id})
|
|
357
|
+
except (RuntimeError, WebSocketDisconnect):
|
|
358
|
+
logger.debug("WebSocket closed, cannot send task_id")
|
|
359
|
+
return
|
|
360
|
+
|
|
361
|
+
logger.info(
|
|
362
|
+
f"[{task_id}] Starting question generation: {requirement.get('knowledge_point', 'Unknown')}"
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# 2. Initialize Coordinator
|
|
366
|
+
# Define unified output directory (DeepTutor/data/user/question)
|
|
367
|
+
root_dir = Path(__file__).parent.parent.parent.parent
|
|
368
|
+
output_base = root_dir / "data" / "user" / "question"
|
|
369
|
+
|
|
370
|
+
try:
|
|
371
|
+
llm_config = get_llm_config()
|
|
372
|
+
api_key = llm_config.api_key
|
|
373
|
+
base_url = llm_config.base_url
|
|
374
|
+
api_version = getattr(llm_config, "api_version", None)
|
|
375
|
+
except Exception:
|
|
376
|
+
api_key = None
|
|
377
|
+
base_url = None
|
|
378
|
+
api_version = None
|
|
379
|
+
|
|
380
|
+
coordinator = AgentCoordinator(
|
|
381
|
+
api_key=api_key,
|
|
382
|
+
base_url=base_url,
|
|
383
|
+
api_version=api_version,
|
|
384
|
+
kb_name=kb_name,
|
|
385
|
+
max_rounds=10,
|
|
386
|
+
output_dir=str(output_base),
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
# 3. Setup Log Queue for WebSocket streaming
|
|
390
|
+
log_queue = asyncio.Queue()
|
|
391
|
+
|
|
392
|
+
# WebSocket callback for coordinator to send structured updates
|
|
393
|
+
async def ws_callback(data: dict):
|
|
394
|
+
try:
|
|
395
|
+
await log_queue.put(data)
|
|
396
|
+
except Exception:
|
|
397
|
+
pass
|
|
398
|
+
|
|
399
|
+
coordinator.set_ws_callback(ws_callback)
|
|
400
|
+
|
|
401
|
+
# 4. Define background pusher for logs
|
|
402
|
+
async def log_pusher():
|
|
403
|
+
while True:
|
|
404
|
+
entry = await log_queue.get()
|
|
405
|
+
try:
|
|
406
|
+
await websocket.send_json(entry)
|
|
407
|
+
except Exception:
|
|
408
|
+
break
|
|
409
|
+
log_queue.task_done()
|
|
410
|
+
|
|
411
|
+
pusher_task = asyncio.create_task(log_pusher())
|
|
412
|
+
|
|
413
|
+
# 5. Setup LogInterceptor for capturing logger output (same as solve.py)
|
|
414
|
+
# Get the coordinator's logger to intercept
|
|
415
|
+
target_logger = coordinator.logger.logger
|
|
416
|
+
interceptor = LogInterceptor(target_logger, log_queue)
|
|
417
|
+
|
|
418
|
+
# 6. Run Generation with LogInterceptor
|
|
419
|
+
try:
|
|
420
|
+
with interceptor:
|
|
421
|
+
try:
|
|
422
|
+
await websocket.send_json({"type": "status", "content": "started"})
|
|
423
|
+
except (RuntimeError, WebSocketDisconnect):
|
|
424
|
+
logger.debug("WebSocket closed, stopping question generation")
|
|
425
|
+
return
|
|
426
|
+
|
|
427
|
+
# Use custom mode generation (new streamlined flow)
|
|
428
|
+
logger.info(f"Starting custom mode generation for {count} question(s)")
|
|
429
|
+
|
|
430
|
+
# Use the new custom generation method
|
|
431
|
+
batch_result = await coordinator.generate_questions_custom(
|
|
432
|
+
requirement=requirement,
|
|
433
|
+
num_questions=count,
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
# Results are already sent via WebSocket callbacks in the coordinator
|
|
437
|
+
# Just need to save to history for successful results
|
|
438
|
+
for result in batch_result.get("results", []):
|
|
439
|
+
# Save to history
|
|
440
|
+
history_manager.add_entry(
|
|
441
|
+
activity_type=ActivityType.QUESTION,
|
|
442
|
+
title=f"{requirement.get('knowledge_point', 'Question')} ({requirement.get('question_type')})",
|
|
443
|
+
content={
|
|
444
|
+
"requirement": requirement,
|
|
445
|
+
"question": result.get("question", {}),
|
|
446
|
+
"validation": result.get("validation", {}),
|
|
447
|
+
"kb_name": kb_name,
|
|
448
|
+
},
|
|
449
|
+
summary=result.get("question", {}).get("question", "")[:100],
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
# Send final token stats
|
|
453
|
+
try:
|
|
454
|
+
await websocket.send_json(
|
|
455
|
+
{"type": "token_stats", "stats": coordinator.token_stats}
|
|
456
|
+
)
|
|
457
|
+
except (RuntimeError, WebSocketDisconnect):
|
|
458
|
+
logger.debug("WebSocket closed, stopping question generation")
|
|
459
|
+
|
|
460
|
+
# Send batch summary
|
|
461
|
+
try:
|
|
462
|
+
await websocket.send_json(
|
|
463
|
+
{
|
|
464
|
+
"type": "batch_summary",
|
|
465
|
+
"requested": batch_result.get("requested", count),
|
|
466
|
+
"completed": batch_result.get("completed", 0),
|
|
467
|
+
"failed": batch_result.get("failed", 0),
|
|
468
|
+
"plan": batch_result.get("plan", {}),
|
|
469
|
+
}
|
|
470
|
+
)
|
|
471
|
+
except (RuntimeError, WebSocketDisconnect):
|
|
472
|
+
pass
|
|
473
|
+
|
|
474
|
+
if not batch_result.get("success"):
|
|
475
|
+
logger.warning(
|
|
476
|
+
f"Question generation had failures: {batch_result.get('failed', 0)} failed"
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
# Wait for any pending messages in the queue to be sent
|
|
480
|
+
# Give the pusher a moment to process remaining messages
|
|
481
|
+
await asyncio.sleep(0.1)
|
|
482
|
+
while not log_queue.empty():
|
|
483
|
+
await asyncio.sleep(0.05)
|
|
484
|
+
|
|
485
|
+
# Send complete signal
|
|
486
|
+
try:
|
|
487
|
+
await websocket.send_json({"type": "complete"})
|
|
488
|
+
logger.info(f"[{task_id}] Question generation completed")
|
|
489
|
+
task_manager.update_task_status(task_id, "completed")
|
|
490
|
+
except (RuntimeError, WebSocketDisconnect):
|
|
491
|
+
logger.debug("WebSocket closed, cannot send complete signal")
|
|
492
|
+
|
|
493
|
+
except Exception as e:
|
|
494
|
+
error_msg = format_exception_message(e)
|
|
495
|
+
error_traceback = traceback.format_exc()
|
|
496
|
+
logger.error(f"Question generation error: {error_msg}")
|
|
497
|
+
logger.error(f"Error traceback:\n{error_traceback}")
|
|
498
|
+
|
|
499
|
+
# Log additional context if available
|
|
500
|
+
try:
|
|
501
|
+
if "result" in locals():
|
|
502
|
+
logger.error(
|
|
503
|
+
f"Result type: {type(result)}, result keys: {result.keys() if isinstance(result, dict) else 'N/A'}"
|
|
504
|
+
)
|
|
505
|
+
if isinstance(result, dict) and "validation" in result:
|
|
506
|
+
validation = result["validation"]
|
|
507
|
+
logger.error(f"Validation type: {type(validation)}")
|
|
508
|
+
if isinstance(validation, dict):
|
|
509
|
+
logger.error(f"Validation keys: {validation.keys()}")
|
|
510
|
+
logger.error(
|
|
511
|
+
f"Issues type: {type(validation.get('issues'))}, value: {validation.get('issues')}"
|
|
512
|
+
)
|
|
513
|
+
logger.error(
|
|
514
|
+
f"Suggestions type: {type(validation.get('suggestions'))}, value: {validation.get('suggestions')}"
|
|
515
|
+
)
|
|
516
|
+
except Exception as context_error:
|
|
517
|
+
logger.warning(f"Failed to log error context: {context_error}")
|
|
518
|
+
|
|
519
|
+
try:
|
|
520
|
+
await websocket.send_json({"type": "error", "content": error_msg})
|
|
521
|
+
except (RuntimeError, WebSocketDisconnect):
|
|
522
|
+
logger.debug("WebSocket closed, cannot send error message")
|
|
523
|
+
task_manager.update_task_status(task_id, "error", error=error_msg)
|
|
524
|
+
|
|
525
|
+
finally:
|
|
526
|
+
pusher_task.cancel()
|
|
527
|
+
try:
|
|
528
|
+
await pusher_task
|
|
529
|
+
except asyncio.CancelledError:
|
|
530
|
+
pass
|
|
531
|
+
await websocket.close()
|
|
532
|
+
|
|
533
|
+
except WebSocketDisconnect:
|
|
534
|
+
logger.debug("Client disconnected")
|
|
535
|
+
except Exception as e:
|
|
536
|
+
error_msg = format_exception_message(e)
|
|
537
|
+
logger.error(f"WebSocket error: {error_msg}")
|