realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,1309 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
ResearchPipeline 2.0 - Research workflow based on dynamic topic queue
|
|
5
|
+
Coordinates three stages: Planning -> Researching -> Reporting
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
import json
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import sys
|
|
13
|
+
from typing import Any, Callable
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _get_project_root() -> Path:
|
|
17
|
+
"""
|
|
18
|
+
Get project root directory robustly by looking for marker files.
|
|
19
|
+
Works regardless of how the script is invoked.
|
|
20
|
+
"""
|
|
21
|
+
# Start from current file's directory
|
|
22
|
+
current = Path(__file__).resolve().parent
|
|
23
|
+
|
|
24
|
+
# Walk up looking for project markers (pyproject.toml, requirements.txt, or src/ directory)
|
|
25
|
+
markers = ["pyproject.toml", "requirements.txt", ".git"]
|
|
26
|
+
|
|
27
|
+
for _ in range(10): # Limit to 10 levels up
|
|
28
|
+
for marker in markers:
|
|
29
|
+
if (current / marker).exists():
|
|
30
|
+
return current
|
|
31
|
+
parent = current.parent
|
|
32
|
+
if parent == current: # Reached filesystem root
|
|
33
|
+
break
|
|
34
|
+
current = parent
|
|
35
|
+
|
|
36
|
+
# Fallback: use relative path from this file
|
|
37
|
+
# This file is at: src/agents/research/research_pipeline.py
|
|
38
|
+
# So project root is: ../../../
|
|
39
|
+
return Path(__file__).resolve().parent.parent.parent.parent
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Get project root
|
|
43
|
+
PROJECT_ROOT = _get_project_root()
|
|
44
|
+
|
|
45
|
+
# Add project root to path for imports
|
|
46
|
+
sys.path.insert(0, str(PROJECT_ROOT))
|
|
47
|
+
|
|
48
|
+
from src.agents.research.agents import (
|
|
49
|
+
DecomposeAgent,
|
|
50
|
+
ManagerAgent,
|
|
51
|
+
NoteAgent,
|
|
52
|
+
RephraseAgent,
|
|
53
|
+
ReportingAgent,
|
|
54
|
+
ResearchAgent,
|
|
55
|
+
)
|
|
56
|
+
from src.agents.research.data_structures import DynamicTopicQueue
|
|
57
|
+
from src.agents.research.utils.citation_manager import CitationManager
|
|
58
|
+
from src.logging import get_logger
|
|
59
|
+
from src.tools.code_executor import run_code
|
|
60
|
+
from src.tools.paper_search_tool import PaperSearchTool
|
|
61
|
+
from src.tools.query_item_tool import query_numbered_item
|
|
62
|
+
from src.tools.rag_tool import rag_search
|
|
63
|
+
from src.tools.web_search import web_search
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class ResearchPipeline:
|
|
67
|
+
"""DR-in-KG 2.0 Research workflow"""
|
|
68
|
+
|
|
69
|
+
def __init__(
|
|
70
|
+
self,
|
|
71
|
+
config: dict[str, Any],
|
|
72
|
+
api_key: str,
|
|
73
|
+
base_url: str,
|
|
74
|
+
api_version: str | None = None,
|
|
75
|
+
research_id: str | None = None,
|
|
76
|
+
kb_name: str | None = None,
|
|
77
|
+
progress_callback: Callable | None = None,
|
|
78
|
+
):
|
|
79
|
+
"""
|
|
80
|
+
Initialize research workflow
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
config: Configuration dictionary
|
|
84
|
+
api_key: API key
|
|
85
|
+
base_url: API endpoint
|
|
86
|
+
api_version: API version (for Azure OpenAI)
|
|
87
|
+
research_id: Research task ID (optional)
|
|
88
|
+
kb_name: Knowledge base name (optional, if provided overrides config file setting)
|
|
89
|
+
progress_callback: Progress callback function (optional), signature: callback(event: Dict[str, Any])
|
|
90
|
+
"""
|
|
91
|
+
self.config = config
|
|
92
|
+
self.progress_callback = progress_callback
|
|
93
|
+
|
|
94
|
+
# If kb_name is provided, override config
|
|
95
|
+
if kb_name is not None:
|
|
96
|
+
if "rag" not in self.config:
|
|
97
|
+
self.config["rag"] = {}
|
|
98
|
+
self.config["rag"]["kb_name"] = kb_name
|
|
99
|
+
self.api_key = api_key
|
|
100
|
+
self.base_url = base_url
|
|
101
|
+
self.api_version = api_version or config.get("llm", {}).get("api_version")
|
|
102
|
+
self.input_topic: str | None = None
|
|
103
|
+
self.optimized_topic: str | None = None
|
|
104
|
+
|
|
105
|
+
# Generate research ID
|
|
106
|
+
if research_id is None:
|
|
107
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
108
|
+
self.research_id = f"research_{timestamp}"
|
|
109
|
+
else:
|
|
110
|
+
self.research_id = research_id
|
|
111
|
+
|
|
112
|
+
# Set directories
|
|
113
|
+
system_config = config.get("system", {})
|
|
114
|
+
self.cache_dir = Path(system_config.get("output_base_dir", "./cache")) / self.research_id
|
|
115
|
+
self.reports_dir = Path(system_config.get("reports_dir", "./reports"))
|
|
116
|
+
|
|
117
|
+
# Create directories
|
|
118
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
119
|
+
self.reports_dir.mkdir(parents=True, exist_ok=True)
|
|
120
|
+
self.plan_progress_file = self.cache_dir / "planning_progress.json"
|
|
121
|
+
self.report_progress_file = self.cache_dir / "reporting_progress.json"
|
|
122
|
+
self.queue_progress_file = self.cache_dir / "queue_progress.json"
|
|
123
|
+
self._stage_events: dict[str, list[dict[str, Any]]] = {
|
|
124
|
+
"planning": [],
|
|
125
|
+
"reporting": [],
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# Initialize queue
|
|
129
|
+
queue_cfg = config.get("queue", {})
|
|
130
|
+
self.queue = DynamicTopicQueue(
|
|
131
|
+
self.research_id,
|
|
132
|
+
max_length=queue_cfg.get("max_length"),
|
|
133
|
+
state_file=str(self.queue_progress_file),
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Initialize unified logging system (must be before _init_agents)
|
|
137
|
+
self._init_logger()
|
|
138
|
+
|
|
139
|
+
# Initialize Agents
|
|
140
|
+
self.agents = {}
|
|
141
|
+
self._init_agents()
|
|
142
|
+
|
|
143
|
+
# Tool instances
|
|
144
|
+
self._paper_tool: PaperSearchTool | None = None
|
|
145
|
+
|
|
146
|
+
# Citation manager
|
|
147
|
+
self.citation_manager = CitationManager(self.research_id, self.cache_dir)
|
|
148
|
+
|
|
149
|
+
# Lock for thread-safe progress file writing in parallel mode
|
|
150
|
+
import threading
|
|
151
|
+
|
|
152
|
+
self._progress_file_lock = threading.Lock()
|
|
153
|
+
|
|
154
|
+
def _init_logger(self):
|
|
155
|
+
"""Initialize unified logging system"""
|
|
156
|
+
# Get log_dir from config paths (user_log_dir from main.yaml)
|
|
157
|
+
log_dir = self.config.get("paths", {}).get("user_log_dir") or self.config.get(
|
|
158
|
+
"logging", {}
|
|
159
|
+
).get("log_dir")
|
|
160
|
+
|
|
161
|
+
self.logger = get_logger(name="Research", log_dir=log_dir)
|
|
162
|
+
self.logger.success("Logger initialized")
|
|
163
|
+
|
|
164
|
+
def _init_agents(self):
|
|
165
|
+
"""Initialize all Agents"""
|
|
166
|
+
if self.logger:
|
|
167
|
+
self.logger.info("Initializing Agents...")
|
|
168
|
+
|
|
169
|
+
self.agents = {
|
|
170
|
+
"rephrase": RephraseAgent(
|
|
171
|
+
self.config, self.api_key, self.base_url, api_version=self.api_version
|
|
172
|
+
),
|
|
173
|
+
"decompose": DecomposeAgent(
|
|
174
|
+
self.config, self.api_key, self.base_url, api_version=self.api_version
|
|
175
|
+
),
|
|
176
|
+
"manager": ManagerAgent(
|
|
177
|
+
self.config, self.api_key, self.base_url, api_version=self.api_version
|
|
178
|
+
),
|
|
179
|
+
"research": ResearchAgent(
|
|
180
|
+
self.config, self.api_key, self.base_url, api_version=self.api_version
|
|
181
|
+
),
|
|
182
|
+
"note": NoteAgent(
|
|
183
|
+
self.config, self.api_key, self.base_url, api_version=self.api_version
|
|
184
|
+
),
|
|
185
|
+
"reporting": ReportingAgent(
|
|
186
|
+
self.config, self.api_key, self.base_url, api_version=self.api_version
|
|
187
|
+
),
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
# Set Manager's queue
|
|
191
|
+
self.agents["manager"].set_queue(self.queue)
|
|
192
|
+
|
|
193
|
+
if self.logger:
|
|
194
|
+
self.logger.success(f"Initialized {len(self.agents)} Agents")
|
|
195
|
+
|
|
196
|
+
async def _call_tool_with_timeout(
|
|
197
|
+
self, coro, timeout: float = 60.0, tool_name: str = "tool"
|
|
198
|
+
) -> Any:
|
|
199
|
+
"""
|
|
200
|
+
Execute a coroutine with timeout support
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
coro: Coroutine to execute
|
|
204
|
+
timeout: Timeout in seconds (default 60s)
|
|
205
|
+
tool_name: Name of the tool for logging
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Result of the coroutine
|
|
209
|
+
|
|
210
|
+
Raises:
|
|
211
|
+
asyncio.TimeoutError: If timeout exceeded
|
|
212
|
+
"""
|
|
213
|
+
try:
|
|
214
|
+
return await asyncio.wait_for(coro, timeout=timeout)
|
|
215
|
+
except asyncio.TimeoutError:
|
|
216
|
+
self.logger.warning(f"Tool {tool_name} timed out after {timeout}s")
|
|
217
|
+
raise
|
|
218
|
+
|
|
219
|
+
async def _call_tool_with_retry(
|
|
220
|
+
self,
|
|
221
|
+
tool_func,
|
|
222
|
+
*args,
|
|
223
|
+
max_retries: int = 2,
|
|
224
|
+
timeout: float = 60.0,
|
|
225
|
+
tool_name: str = "tool",
|
|
226
|
+
**kwargs,
|
|
227
|
+
) -> Any:
|
|
228
|
+
"""
|
|
229
|
+
Call a tool function with retry and timeout support
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
tool_func: Tool function to call
|
|
233
|
+
*args: Positional arguments for the function
|
|
234
|
+
max_retries: Maximum number of retries (default 2)
|
|
235
|
+
timeout: Timeout per attempt in seconds (default 60s)
|
|
236
|
+
tool_name: Name of the tool for logging
|
|
237
|
+
**kwargs: Keyword arguments for the function
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Result of the tool function
|
|
241
|
+
"""
|
|
242
|
+
last_error = None
|
|
243
|
+
|
|
244
|
+
for attempt in range(max_retries + 1):
|
|
245
|
+
try:
|
|
246
|
+
if asyncio.iscoroutinefunction(tool_func):
|
|
247
|
+
result = await self._call_tool_with_timeout(
|
|
248
|
+
tool_func(*args, **kwargs), timeout=timeout, tool_name=tool_name
|
|
249
|
+
)
|
|
250
|
+
else:
|
|
251
|
+
# For sync functions, run in executor
|
|
252
|
+
import functools
|
|
253
|
+
|
|
254
|
+
loop = asyncio.get_event_loop()
|
|
255
|
+
result = await asyncio.wait_for(
|
|
256
|
+
loop.run_in_executor(None, functools.partial(tool_func, *args, **kwargs)),
|
|
257
|
+
timeout=timeout,
|
|
258
|
+
)
|
|
259
|
+
return result
|
|
260
|
+
except asyncio.TimeoutError as e:
|
|
261
|
+
last_error = e
|
|
262
|
+
if attempt < max_retries:
|
|
263
|
+
self.logger.warning(
|
|
264
|
+
f"Tool {tool_name} attempt {attempt + 1} timed out, retrying..."
|
|
265
|
+
)
|
|
266
|
+
await asyncio.sleep(1) # Brief pause before retry
|
|
267
|
+
except Exception as e:
|
|
268
|
+
last_error = e
|
|
269
|
+
if attempt < max_retries:
|
|
270
|
+
self.logger.warning(
|
|
271
|
+
f"Tool {tool_name} attempt {attempt + 1} failed: {e}, retrying..."
|
|
272
|
+
)
|
|
273
|
+
await asyncio.sleep(1)
|
|
274
|
+
|
|
275
|
+
# All retries exhausted
|
|
276
|
+
self.logger.error(f"Tool {tool_name} failed after {max_retries + 1} attempts: {last_error}")
|
|
277
|
+
raise last_error if last_error else RuntimeError(f"{tool_name} failed")
|
|
278
|
+
|
|
279
|
+
async def _call_tool(self, tool_type: str, query: str) -> str:
|
|
280
|
+
"""Call tool and return raw string answer (JSON string or text)"""
|
|
281
|
+
tool_type = (tool_type or "").lower()
|
|
282
|
+
|
|
283
|
+
# Get timeout and retry settings from config
|
|
284
|
+
tool_config = self.config.get("researching", {})
|
|
285
|
+
default_timeout = tool_config.get("tool_timeout", 60)
|
|
286
|
+
max_retries = tool_config.get("tool_max_retries", 2)
|
|
287
|
+
|
|
288
|
+
try:
|
|
289
|
+
if tool_type in ("rag_hybrid", "rag_naive", "rag"):
|
|
290
|
+
rag_cfg = self.config.get("rag", {})
|
|
291
|
+
kb_name = rag_cfg.get("kb_name", "DE-all")
|
|
292
|
+
default_mode = rag_cfg.get("default_mode", "hybrid")
|
|
293
|
+
fallback_mode = rag_cfg.get("fallback_mode", "naive")
|
|
294
|
+
if tool_type == "rag_hybrid":
|
|
295
|
+
mode = "hybrid"
|
|
296
|
+
elif tool_type == "rag_naive":
|
|
297
|
+
mode = "naive"
|
|
298
|
+
else:
|
|
299
|
+
mode = default_mode
|
|
300
|
+
try:
|
|
301
|
+
res = await self._call_tool_with_retry(
|
|
302
|
+
rag_search,
|
|
303
|
+
query=query,
|
|
304
|
+
kb_name=kb_name,
|
|
305
|
+
mode=mode,
|
|
306
|
+
max_retries=max_retries,
|
|
307
|
+
timeout=default_timeout,
|
|
308
|
+
tool_name=f"rag_search({mode})",
|
|
309
|
+
)
|
|
310
|
+
except Exception:
|
|
311
|
+
# Retry with fallback mode
|
|
312
|
+
res = await self._call_tool_with_retry(
|
|
313
|
+
rag_search,
|
|
314
|
+
query=query,
|
|
315
|
+
kb_name=kb_name,
|
|
316
|
+
mode=fallback_mode,
|
|
317
|
+
max_retries=1,
|
|
318
|
+
timeout=default_timeout,
|
|
319
|
+
tool_name=f"rag_search({fallback_mode})",
|
|
320
|
+
)
|
|
321
|
+
return json.dumps(res, ensure_ascii=False)
|
|
322
|
+
|
|
323
|
+
if tool_type == "web_search":
|
|
324
|
+
res = await self._call_tool_with_retry(
|
|
325
|
+
web_search,
|
|
326
|
+
query=query,
|
|
327
|
+
output_dir=str(self.cache_dir),
|
|
328
|
+
max_retries=max_retries,
|
|
329
|
+
timeout=default_timeout,
|
|
330
|
+
tool_name="web_search",
|
|
331
|
+
)
|
|
332
|
+
return json.dumps(res, ensure_ascii=False)
|
|
333
|
+
|
|
334
|
+
if tool_type == "query_item":
|
|
335
|
+
kb_name = self.config.get("rag", {}).get("kb_name", "ai_textbook")
|
|
336
|
+
res = await self._call_tool_with_retry(
|
|
337
|
+
query_numbered_item,
|
|
338
|
+
identifier=query,
|
|
339
|
+
kb_name=kb_name,
|
|
340
|
+
max_retries=max_retries,
|
|
341
|
+
timeout=default_timeout,
|
|
342
|
+
tool_name="query_item",
|
|
343
|
+
)
|
|
344
|
+
return json.dumps(res, ensure_ascii=False)
|
|
345
|
+
|
|
346
|
+
if tool_type == "paper_search":
|
|
347
|
+
if self._paper_tool is None:
|
|
348
|
+
self._paper_tool = PaperSearchTool()
|
|
349
|
+
years_limit = self.config.get("researching", {}).get("paper_search_years_limit", 3)
|
|
350
|
+
papers = await self._call_tool_with_retry(
|
|
351
|
+
self._paper_tool.search_papers,
|
|
352
|
+
query=query,
|
|
353
|
+
max_results=3,
|
|
354
|
+
years_limit=years_limit,
|
|
355
|
+
max_retries=max_retries,
|
|
356
|
+
timeout=default_timeout,
|
|
357
|
+
tool_name="paper_search",
|
|
358
|
+
)
|
|
359
|
+
return json.dumps({"papers": papers}, ensure_ascii=False)
|
|
360
|
+
|
|
361
|
+
if tool_type == "run_code":
|
|
362
|
+
# Code execution has its own internal timeout (10s), wrapper timeout is 30s
|
|
363
|
+
result = await self._call_tool_with_retry(
|
|
364
|
+
run_code,
|
|
365
|
+
language="python",
|
|
366
|
+
code=query,
|
|
367
|
+
max_retries=1,
|
|
368
|
+
timeout=30, # Wrapper timeout
|
|
369
|
+
tool_name="run_code",
|
|
370
|
+
)
|
|
371
|
+
return json.dumps(result, ensure_ascii=False)
|
|
372
|
+
|
|
373
|
+
# Default fallback to RAG hybrid
|
|
374
|
+
kb_name = self.config.get("rag", {}).get("kb_name", "ai_textbook")
|
|
375
|
+
res = await self._call_tool_with_retry(
|
|
376
|
+
rag_search,
|
|
377
|
+
query=query,
|
|
378
|
+
kb_name=kb_name,
|
|
379
|
+
mode="hybrid",
|
|
380
|
+
max_retries=max_retries,
|
|
381
|
+
timeout=default_timeout,
|
|
382
|
+
tool_name="rag_search(hybrid)",
|
|
383
|
+
)
|
|
384
|
+
return json.dumps(res, ensure_ascii=False)
|
|
385
|
+
except Exception as e:
|
|
386
|
+
return json.dumps(
|
|
387
|
+
{"status": "failed", "error": str(e), "tool": tool_type, "query": query},
|
|
388
|
+
ensure_ascii=False,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
async def run(self, topic: str) -> dict[str, Any]:
|
|
392
|
+
"""
|
|
393
|
+
Execute complete research workflow
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
topic: Research topic
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
Research result
|
|
400
|
+
"""
|
|
401
|
+
if self.logger:
|
|
402
|
+
self.logger.section("DR-in-KG 2.0 - Deep Research System Based on Dynamic Topic Queue")
|
|
403
|
+
self.logger.info(f"Research Topic: {topic}")
|
|
404
|
+
self.logger.info(f"Research ID: {self.research_id}")
|
|
405
|
+
self.input_topic = topic
|
|
406
|
+
|
|
407
|
+
try:
|
|
408
|
+
# ========== Phase 1: Planning (Planning and Initialization) ==========
|
|
409
|
+
self.logger.info("\n" + "═" * 70)
|
|
410
|
+
self.logger.info("▶ Phase 1: Planning - Planning and Initialization")
|
|
411
|
+
self.logger.info("═" * 70)
|
|
412
|
+
|
|
413
|
+
optimized_topic = await self._phase1_planning(topic)
|
|
414
|
+
|
|
415
|
+
# ========== Phase 2: Researching (Dynamic Research Loop) ==========
|
|
416
|
+
self.logger.info("\n" + "═" * 70)
|
|
417
|
+
self.logger.info("▶ Phase 2: Researching - Dynamic Research Loop")
|
|
418
|
+
self.logger.info("═" * 70)
|
|
419
|
+
|
|
420
|
+
await self._phase2_researching()
|
|
421
|
+
|
|
422
|
+
# ========== Phase 3: Reporting (Report Generation) ==========
|
|
423
|
+
self.logger.info("\n" + "═" * 70)
|
|
424
|
+
self.logger.info("▶ Phase 3: Reporting - Report Generation")
|
|
425
|
+
self.logger.info("═" * 70)
|
|
426
|
+
|
|
427
|
+
report_result = await self._phase3_reporting(optimized_topic)
|
|
428
|
+
|
|
429
|
+
# ========== Save Results ==========
|
|
430
|
+
self.logger.info("\n" + "═" * 70)
|
|
431
|
+
self.logger.info("▶ Save Results")
|
|
432
|
+
self.logger.info("═" * 70 + "\n")
|
|
433
|
+
|
|
434
|
+
report_file = self.reports_dir / f"{self.research_id}.md"
|
|
435
|
+
with open(report_file, "w", encoding="utf-8") as f:
|
|
436
|
+
f.write(report_result["report"])
|
|
437
|
+
self.logger.success(f"Final Report: {report_file}")
|
|
438
|
+
|
|
439
|
+
# Save queue
|
|
440
|
+
queue_file = self.cache_dir / "queue.json"
|
|
441
|
+
self.queue.save_to_json(str(queue_file))
|
|
442
|
+
self.logger.success(f"Queue Data: {queue_file}")
|
|
443
|
+
|
|
444
|
+
# Save outline (if exists)
|
|
445
|
+
if "outline" in report_result:
|
|
446
|
+
outline_file = self.cache_dir / "outline.json"
|
|
447
|
+
with open(outline_file, "w", encoding="utf-8") as f:
|
|
448
|
+
json.dump(report_result["outline"], f, ensure_ascii=False, indent=2)
|
|
449
|
+
self.logger.success(f"Report Outline: {outline_file}")
|
|
450
|
+
|
|
451
|
+
# Save metadata
|
|
452
|
+
metadata = {
|
|
453
|
+
"research_id": self.research_id,
|
|
454
|
+
"topic": topic,
|
|
455
|
+
"optimized_topic": optimized_topic,
|
|
456
|
+
"statistics": self.queue.get_statistics(),
|
|
457
|
+
"report_word_count": report_result["word_count"],
|
|
458
|
+
"completed_at": datetime.now().isoformat(),
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
metadata_file = self.reports_dir / f"{self.research_id}_metadata.json"
|
|
462
|
+
with open(metadata_file, "w", encoding="utf-8") as f:
|
|
463
|
+
json.dump(metadata, f, ensure_ascii=False, indent=2)
|
|
464
|
+
self.logger.success(f"Metadata: {metadata_file}")
|
|
465
|
+
|
|
466
|
+
# ===== Token Cost Statistics =====
|
|
467
|
+
try:
|
|
468
|
+
from src.agents.research.utils.token_tracker import get_token_tracker
|
|
469
|
+
|
|
470
|
+
tracker = get_token_tracker()
|
|
471
|
+
cost_summary = tracker.format_summary()
|
|
472
|
+
self.logger.info(cost_summary)
|
|
473
|
+
cost_file = self.cache_dir / "token_cost_summary.json"
|
|
474
|
+
tracker.save(str(cost_file))
|
|
475
|
+
self.logger.success(f"Cost statistics saved: {cost_file}")
|
|
476
|
+
except Exception as _e:
|
|
477
|
+
self.logger.warning(f"Cost statistics failed: {_e}")
|
|
478
|
+
|
|
479
|
+
self.logger.info("\n" + "=" * 70)
|
|
480
|
+
self.logger.success("Research Completed!")
|
|
481
|
+
self.logger.info("=" * 70)
|
|
482
|
+
self.logger.info(f"Research ID: {self.research_id}")
|
|
483
|
+
self.logger.info(f"Topic: {topic}")
|
|
484
|
+
self.logger.info(f"Final Report: {report_file}")
|
|
485
|
+
self.logger.info(f"Report Word Count: {report_result['word_count']}")
|
|
486
|
+
self.logger.info(f"Topic Blocks: {len(self.queue.blocks)}")
|
|
487
|
+
self.logger.info("=" * 70 + "\n")
|
|
488
|
+
|
|
489
|
+
return {
|
|
490
|
+
"research_id": self.research_id,
|
|
491
|
+
"topic": topic,
|
|
492
|
+
"final_report_path": str(report_file),
|
|
493
|
+
"metadata": metadata,
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
except KeyboardInterrupt:
|
|
497
|
+
self.logger.warning("\n\n⚠️ Research interrupted by user")
|
|
498
|
+
sys.exit(0)
|
|
499
|
+
except Exception as e:
|
|
500
|
+
self.logger.error(f"\n\n✗ Research failed: {e!s}")
|
|
501
|
+
import traceback
|
|
502
|
+
|
|
503
|
+
self.logger.error(traceback.format_exc())
|
|
504
|
+
raise
|
|
505
|
+
|
|
506
|
+
async def _phase1_planning(self, topic: str) -> str:
|
|
507
|
+
"""
|
|
508
|
+
Phase 1: Planning and Initialization
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
topic: User input topic
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
Optimized topic
|
|
515
|
+
"""
|
|
516
|
+
self._log_progress("planning", "planning_started", user_topic=topic)
|
|
517
|
+
|
|
518
|
+
# Check if topic rephrasing is enabled
|
|
519
|
+
rephrase_config = self.config.get("planning", {}).get("rephrase", {})
|
|
520
|
+
rephrase_enabled = rephrase_config.get("enabled", True)
|
|
521
|
+
|
|
522
|
+
if rephrase_enabled:
|
|
523
|
+
self.logger.info("\n【Step 1】Topic Rephrasing...")
|
|
524
|
+
|
|
525
|
+
# Use RephraseAgent to optimize topic (supports user interaction)
|
|
526
|
+
max_iterations = rephrase_config.get("max_iterations", 3)
|
|
527
|
+
|
|
528
|
+
rephrase_result = None
|
|
529
|
+
iteration = 0
|
|
530
|
+
user_feedback = None # Initialize user feedback variable
|
|
531
|
+
|
|
532
|
+
# Check if running in frontend mode (has progress_callback)
|
|
533
|
+
# In frontend mode, skip interactive input loop - user controls via frontend UI
|
|
534
|
+
is_frontend_mode = self.progress_callback is not None
|
|
535
|
+
|
|
536
|
+
while iteration < max_iterations:
|
|
537
|
+
# Execute rephrasing
|
|
538
|
+
if iteration == 0:
|
|
539
|
+
rephrase_result = await self.agents["rephrase"].process(
|
|
540
|
+
topic, iteration=iteration
|
|
541
|
+
)
|
|
542
|
+
# Continue rephrasing based on user feedback
|
|
543
|
+
elif user_feedback:
|
|
544
|
+
rephrase_result = await self.agents["rephrase"].process(
|
|
545
|
+
user_feedback, iteration=iteration, previous_result=rephrase_result
|
|
546
|
+
)
|
|
547
|
+
else:
|
|
548
|
+
# If no feedback, use previous result
|
|
549
|
+
break
|
|
550
|
+
|
|
551
|
+
iteration += 1
|
|
552
|
+
|
|
553
|
+
# In frontend mode, only do one iteration and exit
|
|
554
|
+
# User will control further iterations via frontend UI (/optimize_topic API)
|
|
555
|
+
if is_frontend_mode:
|
|
556
|
+
self.logger.info(f"\n{'=' * 70}")
|
|
557
|
+
self.logger.info("📋 Rephrase Result (Frontend Mode):")
|
|
558
|
+
self.logger.info(f"{'=' * 70}")
|
|
559
|
+
self.logger.info(
|
|
560
|
+
f"Optimized Research Topic: {rephrase_result.get('topic', '')}"
|
|
561
|
+
)
|
|
562
|
+
self.logger.info(f"{'=' * 70}")
|
|
563
|
+
self.logger.success(
|
|
564
|
+
"Frontend mode: Using current result, proceeding to next stage"
|
|
565
|
+
)
|
|
566
|
+
break
|
|
567
|
+
|
|
568
|
+
# CLI mode: Ask user opinion (unless max iterations reached)
|
|
569
|
+
if iteration < max_iterations:
|
|
570
|
+
self.logger.info(f"\n{'=' * 70}")
|
|
571
|
+
self.logger.info("📋 Current Rephrasing Result:")
|
|
572
|
+
self.logger.info(f"{'=' * 70}")
|
|
573
|
+
self.logger.info(
|
|
574
|
+
f"Optimized Research Topic: {rephrase_result.get('topic', '')}"
|
|
575
|
+
)
|
|
576
|
+
self.logger.info(f"{'=' * 70}")
|
|
577
|
+
self.logger.info("\n💬 Are you satisfied with this rephrasing result?")
|
|
578
|
+
self.logger.info(
|
|
579
|
+
" - Enter 'satisfied', 'ok', etc. to indicate satisfaction, will proceed to next stage"
|
|
580
|
+
)
|
|
581
|
+
self.logger.info(
|
|
582
|
+
" - Enter specific modification suggestions, will continue optimizing based on your feedback"
|
|
583
|
+
)
|
|
584
|
+
self.logger.info(" - Press Enter directly to use current result")
|
|
585
|
+
|
|
586
|
+
user_input = input("\nYour choice: ").strip()
|
|
587
|
+
|
|
588
|
+
if not user_input:
|
|
589
|
+
self.logger.success("Using current result, proceeding to next stage")
|
|
590
|
+
break
|
|
591
|
+
|
|
592
|
+
# Determine user intent
|
|
593
|
+
satisfaction = await self.agents["rephrase"].check_user_satisfaction(
|
|
594
|
+
rephrase_result, user_input
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
if satisfaction.get("user_satisfied", False):
|
|
598
|
+
self.logger.success("User satisfied, proceeding to next stage")
|
|
599
|
+
break
|
|
600
|
+
|
|
601
|
+
if not satisfaction.get("should_continue", True):
|
|
602
|
+
self.logger.success("Proceeding to next stage")
|
|
603
|
+
break
|
|
604
|
+
|
|
605
|
+
# Continue iteration, use user input as feedback
|
|
606
|
+
user_feedback = user_input
|
|
607
|
+
|
|
608
|
+
# Ensure there is a result
|
|
609
|
+
if rephrase_result is None:
|
|
610
|
+
rephrase_result = {"topic": topic}
|
|
611
|
+
|
|
612
|
+
optimized_topic = rephrase_result.get("topic", topic)
|
|
613
|
+
self._log_progress(
|
|
614
|
+
"planning",
|
|
615
|
+
"rephrase_completed",
|
|
616
|
+
optimized_topic=optimized_topic,
|
|
617
|
+
iterations=iteration,
|
|
618
|
+
)
|
|
619
|
+
else:
|
|
620
|
+
self.logger.info("\n【Step 1】Topic Rephrasing (disabled, skipping)...")
|
|
621
|
+
optimized_topic = topic
|
|
622
|
+
self._log_progress(
|
|
623
|
+
"planning",
|
|
624
|
+
"rephrase_skipped",
|
|
625
|
+
optimized_topic=optimized_topic,
|
|
626
|
+
reason="rephrase feature disabled",
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
self.optimized_topic = optimized_topic
|
|
630
|
+
|
|
631
|
+
self.logger.info("\n【Step 2】Topic Decomposition...")
|
|
632
|
+
|
|
633
|
+
# Use DecomposeAgent to decompose topic
|
|
634
|
+
decompose_config = self.config.get("planning", {}).get("decompose", {})
|
|
635
|
+
mode = decompose_config.get("mode", "manual")
|
|
636
|
+
|
|
637
|
+
if mode == "auto":
|
|
638
|
+
# Auto mode: use auto_max_subtopics as limit
|
|
639
|
+
num_subtopics = decompose_config.get(
|
|
640
|
+
"auto_max_subtopics", decompose_config.get("initial_subtopics", 5)
|
|
641
|
+
)
|
|
642
|
+
self.logger.info(f"📌 Using Auto mode, max subtopics: {num_subtopics}")
|
|
643
|
+
else:
|
|
644
|
+
# Manual mode: use initial_subtopics
|
|
645
|
+
num_subtopics = decompose_config.get("initial_subtopics", 5)
|
|
646
|
+
self.logger.info(f"📌 Using Manual mode, expected subtopics: {num_subtopics}")
|
|
647
|
+
|
|
648
|
+
self._log_progress(
|
|
649
|
+
"planning", "decompose_started", requested_subtopics=num_subtopics, mode=mode
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
# Set citation manager to DecomposeAgent
|
|
653
|
+
self.agents["decompose"].set_citation_manager(self.citation_manager)
|
|
654
|
+
|
|
655
|
+
decompose_result = await self.agents["decompose"].process(
|
|
656
|
+
topic=optimized_topic, num_subtopics=num_subtopics, mode=mode
|
|
657
|
+
)
|
|
658
|
+
self._log_progress(
|
|
659
|
+
"planning",
|
|
660
|
+
"decompose_completed",
|
|
661
|
+
generated_subtopics=decompose_result.get("total_subtopics", 0),
|
|
662
|
+
rag_context_length=len(decompose_result.get("rag_context", "") or ""),
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
# Save Planning stage results (includes sub_queries / rag_context / sub_topics)
|
|
666
|
+
try:
|
|
667
|
+
step1_path = self.cache_dir / "step1_planning.json"
|
|
668
|
+
with open(step1_path, "w", encoding="utf-8") as f:
|
|
669
|
+
json.dump(
|
|
670
|
+
{
|
|
671
|
+
"main_topic": optimized_topic,
|
|
672
|
+
"sub_queries": decompose_result.get("sub_queries", []),
|
|
673
|
+
"rag_context": decompose_result.get("rag_context", ""),
|
|
674
|
+
"sub_topics": decompose_result.get("sub_topics", []),
|
|
675
|
+
"total_subtopics": decompose_result.get("total_subtopics", 0),
|
|
676
|
+
"timestamp": datetime.now().isoformat(),
|
|
677
|
+
},
|
|
678
|
+
f,
|
|
679
|
+
ensure_ascii=False,
|
|
680
|
+
indent=2,
|
|
681
|
+
)
|
|
682
|
+
self.logger.success(f"Planning stage data saved: {step1_path}")
|
|
683
|
+
except Exception as _e:
|
|
684
|
+
self.logger.warning(f"Failed to save Planning stage data: {_e}")
|
|
685
|
+
|
|
686
|
+
# Add subtopics to queue
|
|
687
|
+
self.logger.info("\n【Step 3】Initializing Queue...")
|
|
688
|
+
for sub_topic_data in decompose_result.get("sub_topics", []):
|
|
689
|
+
title = (sub_topic_data.get("title") or "").strip()
|
|
690
|
+
overview = sub_topic_data.get("overview", "")
|
|
691
|
+
if not title:
|
|
692
|
+
continue
|
|
693
|
+
try:
|
|
694
|
+
block = self.queue.add_block(sub_topic=title, overview=overview)
|
|
695
|
+
self._log_progress(
|
|
696
|
+
"planning",
|
|
697
|
+
"queue_seeded",
|
|
698
|
+
block_id=block.block_id,
|
|
699
|
+
sub_topic=block.sub_topic,
|
|
700
|
+
total_blocks=len(self.queue.blocks),
|
|
701
|
+
)
|
|
702
|
+
except RuntimeError as err:
|
|
703
|
+
self._log_progress(
|
|
704
|
+
"planning", "queue_capacity_reached", error=str(err), attempted_topic=title
|
|
705
|
+
)
|
|
706
|
+
self.logger.warning(
|
|
707
|
+
f"Queue reached capacity limit, stopping addition of initial topics: {err}"
|
|
708
|
+
)
|
|
709
|
+
break
|
|
710
|
+
|
|
711
|
+
stats = self.queue.get_statistics()
|
|
712
|
+
self._log_progress("planning", "planning_completed", total_blocks=stats["total_blocks"])
|
|
713
|
+
self.logger.success("\nPhase 1 Completed:")
|
|
714
|
+
self.logger.info(f" - Optimized Topic: {optimized_topic}")
|
|
715
|
+
self.logger.info(f" - Subtopic Count: {stats['total_blocks']}")
|
|
716
|
+
self.agents["manager"].set_primary_topic(optimized_topic)
|
|
717
|
+
|
|
718
|
+
return optimized_topic
|
|
719
|
+
|
|
720
|
+
async def _phase2_researching(self):
|
|
721
|
+
"""
|
|
722
|
+
Phase 2: Dynamic Research Loop
|
|
723
|
+
Routes to series or parallel execution based on configuration
|
|
724
|
+
"""
|
|
725
|
+
execution_mode = self.config.get("researching", {}).get("execution_mode", "series")
|
|
726
|
+
|
|
727
|
+
if execution_mode == "parallel":
|
|
728
|
+
await self._phase2_researching_parallel()
|
|
729
|
+
else:
|
|
730
|
+
await self._phase2_researching_series()
|
|
731
|
+
|
|
732
|
+
async def _phase2_researching_series(self):
|
|
733
|
+
"""
|
|
734
|
+
Phase 2: Dynamic Research Loop (Series Mode - Original Implementation)
|
|
735
|
+
"""
|
|
736
|
+
# Initialize researching stage event list
|
|
737
|
+
if "researching" not in self._stage_events:
|
|
738
|
+
self._stage_events["researching"] = []
|
|
739
|
+
|
|
740
|
+
manager = self.agents["manager"]
|
|
741
|
+
research = self.agents["research"]
|
|
742
|
+
|
|
743
|
+
total_blocks = len(self.queue.blocks)
|
|
744
|
+
completed_blocks = 0
|
|
745
|
+
|
|
746
|
+
self._log_researching_progress(
|
|
747
|
+
"researching_started", total_blocks=total_blocks, execution_mode="series"
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
while not manager.is_research_complete():
|
|
751
|
+
# Get next task to research
|
|
752
|
+
block = manager.get_next_task()
|
|
753
|
+
if not block:
|
|
754
|
+
break
|
|
755
|
+
|
|
756
|
+
self._log_researching_progress(
|
|
757
|
+
"block_started",
|
|
758
|
+
block_id=block.block_id,
|
|
759
|
+
sub_topic=block.sub_topic,
|
|
760
|
+
current_block=completed_blocks + 1,
|
|
761
|
+
total_blocks=total_blocks,
|
|
762
|
+
)
|
|
763
|
+
|
|
764
|
+
# Create iteration progress callback for this block
|
|
765
|
+
iteration_callback = self._create_iteration_progress_callback(
|
|
766
|
+
block_id=block.block_id,
|
|
767
|
+
sub_topic=block.sub_topic,
|
|
768
|
+
execution_mode="series",
|
|
769
|
+
current_block=completed_blocks + 1,
|
|
770
|
+
total_blocks=total_blocks,
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
# Execute research loop (unified handling by ResearchAgent.process)
|
|
774
|
+
result = await research.process(
|
|
775
|
+
topic_block=block,
|
|
776
|
+
call_tool_callback=self._call_tool,
|
|
777
|
+
note_agent=self.agents["note"],
|
|
778
|
+
citation_manager=self.citation_manager,
|
|
779
|
+
queue=self.queue,
|
|
780
|
+
manager_agent=manager,
|
|
781
|
+
config=self.config,
|
|
782
|
+
progress_callback=iteration_callback,
|
|
783
|
+
)
|
|
784
|
+
|
|
785
|
+
# Mark as completed
|
|
786
|
+
manager.complete_task(block.block_id)
|
|
787
|
+
completed_blocks += 1
|
|
788
|
+
|
|
789
|
+
# Update total_blocks in case new topics were added
|
|
790
|
+
total_blocks = len(self.queue.blocks)
|
|
791
|
+
|
|
792
|
+
self._log_researching_progress(
|
|
793
|
+
"block_completed",
|
|
794
|
+
block_id=block.block_id,
|
|
795
|
+
sub_topic=block.sub_topic,
|
|
796
|
+
iterations=result.get("iterations", 0),
|
|
797
|
+
tools_used=result.get("tools_used", []),
|
|
798
|
+
queries_used=result.get("queries_used", []),
|
|
799
|
+
current_block=completed_blocks,
|
|
800
|
+
total_blocks=total_blocks,
|
|
801
|
+
)
|
|
802
|
+
|
|
803
|
+
# Display statistics
|
|
804
|
+
manager.get_queue_status()
|
|
805
|
+
|
|
806
|
+
stats = self.queue.get_statistics()
|
|
807
|
+
self._log_researching_progress(
|
|
808
|
+
"researching_completed",
|
|
809
|
+
completed_blocks=stats["completed"],
|
|
810
|
+
total_tool_calls=stats["total_tool_calls"],
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
self.logger.success("\nPhase 2 Completed:")
|
|
814
|
+
self.logger.info(f" - Completed Topics: {stats['completed']}")
|
|
815
|
+
self.logger.info(f" - Total Tool Calls: {stats['total_tool_calls']}")
|
|
816
|
+
|
|
817
|
+
async def _phase2_researching_parallel(self):
|
|
818
|
+
"""
|
|
819
|
+
Phase 2: Dynamic Research Loop (Parallel Mode)
|
|
820
|
+
Executes multiple topic blocks in parallel with concurrency limit
|
|
821
|
+
"""
|
|
822
|
+
# Initialize researching stage event list
|
|
823
|
+
if "researching" not in self._stage_events:
|
|
824
|
+
self._stage_events["researching"] = []
|
|
825
|
+
|
|
826
|
+
manager = self.agents["manager"]
|
|
827
|
+
research = self.agents["research"]
|
|
828
|
+
|
|
829
|
+
# Get configuration
|
|
830
|
+
max_parallel = self.config.get("researching", {}).get("max_parallel_topics", 5)
|
|
831
|
+
semaphore = asyncio.Semaphore(max_parallel)
|
|
832
|
+
|
|
833
|
+
# Get all pending blocks at the start
|
|
834
|
+
from src.agents.research.data_structures import TopicStatus
|
|
835
|
+
|
|
836
|
+
pending_blocks = [b for b in self.queue.blocks if b.status == TopicStatus.PENDING]
|
|
837
|
+
total_blocks = len(self.queue.blocks)
|
|
838
|
+
|
|
839
|
+
self.logger.info(
|
|
840
|
+
f"\n🚀 Starting parallel research mode (max {max_parallel} concurrent topics)"
|
|
841
|
+
)
|
|
842
|
+
self._log_researching_progress(
|
|
843
|
+
"researching_started",
|
|
844
|
+
total_blocks=total_blocks,
|
|
845
|
+
execution_mode="parallel",
|
|
846
|
+
max_parallel=max_parallel,
|
|
847
|
+
initial_pending=len(pending_blocks),
|
|
848
|
+
)
|
|
849
|
+
|
|
850
|
+
# Track completed blocks
|
|
851
|
+
completed_count = {"value": 0} # Use dict to allow modification in nested function
|
|
852
|
+
|
|
853
|
+
# Create async wrappers for thread-safe operations in parallel mode
|
|
854
|
+
class AsyncCitationManagerWrapper:
|
|
855
|
+
"""Wrapper to use async citation manager methods in parallel mode"""
|
|
856
|
+
|
|
857
|
+
def __init__(self, cm):
|
|
858
|
+
self._cm = cm
|
|
859
|
+
|
|
860
|
+
async def add_citation(self, citation_id, tool_type, tool_trace, raw_answer):
|
|
861
|
+
return await self._cm.add_citation_async(
|
|
862
|
+
citation_id, tool_type, tool_trace, raw_answer
|
|
863
|
+
)
|
|
864
|
+
|
|
865
|
+
def __getattr__(self, name):
|
|
866
|
+
# Forward other attributes to original citation_manager
|
|
867
|
+
return getattr(self._cm, name)
|
|
868
|
+
|
|
869
|
+
class AsyncManagerAgentWrapper:
|
|
870
|
+
"""Wrapper to use async manager agent methods in parallel mode"""
|
|
871
|
+
|
|
872
|
+
def __init__(self, ma):
|
|
873
|
+
self._ma = ma
|
|
874
|
+
|
|
875
|
+
async def add_new_topic(self, sub_topic, overview):
|
|
876
|
+
return await self._ma.add_new_topic_async(sub_topic, overview)
|
|
877
|
+
|
|
878
|
+
def __getattr__(self, name):
|
|
879
|
+
# Forward other attributes to original manager_agent
|
|
880
|
+
return getattr(self._ma, name)
|
|
881
|
+
|
|
882
|
+
async_citation_manager = AsyncCitationManagerWrapper(self.citation_manager)
|
|
883
|
+
async_manager_agent = AsyncManagerAgentWrapper(manager)
|
|
884
|
+
|
|
885
|
+
# Track active tasks for parallel progress display
|
|
886
|
+
active_tasks: dict[str, dict[str, Any]] = {} # block_id -> task info
|
|
887
|
+
active_tasks_lock = asyncio.Lock()
|
|
888
|
+
|
|
889
|
+
async def update_active_task(block_id: str, info: dict[str, Any] | None):
|
|
890
|
+
"""Update active task info (thread-safe)"""
|
|
891
|
+
async with active_tasks_lock:
|
|
892
|
+
if info is None:
|
|
893
|
+
active_tasks.pop(block_id, None)
|
|
894
|
+
else:
|
|
895
|
+
active_tasks[block_id] = info
|
|
896
|
+
# Send parallel status update
|
|
897
|
+
self._log_researching_progress(
|
|
898
|
+
"parallel_status_update",
|
|
899
|
+
active_tasks=list(active_tasks.values()),
|
|
900
|
+
active_count=len(active_tasks),
|
|
901
|
+
completed_count=completed_count["value"],
|
|
902
|
+
total_blocks=total_blocks,
|
|
903
|
+
)
|
|
904
|
+
|
|
905
|
+
async def research_single_block(block: Any) -> dict[str, Any] | None:
|
|
906
|
+
"""
|
|
907
|
+
Research a single topic block with semaphore control
|
|
908
|
+
|
|
909
|
+
Args:
|
|
910
|
+
block: TopicBlock to research
|
|
911
|
+
|
|
912
|
+
Returns:
|
|
913
|
+
Research result or None if failed
|
|
914
|
+
"""
|
|
915
|
+
async with semaphore:
|
|
916
|
+
try:
|
|
917
|
+
# Mark as researching (thread-safe)
|
|
918
|
+
async with manager._lock:
|
|
919
|
+
# Refresh block status from queue
|
|
920
|
+
current_block = self.queue.get_block_by_id(block.block_id)
|
|
921
|
+
if current_block and current_block.status == TopicStatus.PENDING:
|
|
922
|
+
self.queue.mark_researching(block.block_id)
|
|
923
|
+
|
|
924
|
+
# Add to active tasks
|
|
925
|
+
await update_active_task(
|
|
926
|
+
block.block_id,
|
|
927
|
+
{
|
|
928
|
+
"block_id": block.block_id,
|
|
929
|
+
"sub_topic": block.sub_topic,
|
|
930
|
+
"status": "starting",
|
|
931
|
+
"iteration": 0,
|
|
932
|
+
"current_tool": None,
|
|
933
|
+
"current_query": None,
|
|
934
|
+
},
|
|
935
|
+
)
|
|
936
|
+
|
|
937
|
+
self._log_researching_progress(
|
|
938
|
+
"block_started",
|
|
939
|
+
block_id=block.block_id,
|
|
940
|
+
sub_topic=block.sub_topic,
|
|
941
|
+
execution_mode="parallel",
|
|
942
|
+
active_count=len(active_tasks),
|
|
943
|
+
)
|
|
944
|
+
|
|
945
|
+
if self.logger:
|
|
946
|
+
self.logger.info(
|
|
947
|
+
f"\n[{block.block_id}] 🔍 Starting research: {block.sub_topic}"
|
|
948
|
+
)
|
|
949
|
+
|
|
950
|
+
# Get max_iterations from config for this closure
|
|
951
|
+
config_max_iterations = self.config.get("researching", {}).get(
|
|
952
|
+
"max_iterations", 5
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
# Create iteration callback for parallel mode
|
|
956
|
+
def parallel_iteration_callback(event_type: str, **data):
|
|
957
|
+
"""Handle iteration progress in parallel mode"""
|
|
958
|
+
# Update active task info
|
|
959
|
+
task_info = {
|
|
960
|
+
"block_id": block.block_id,
|
|
961
|
+
"sub_topic": block.sub_topic,
|
|
962
|
+
"status": event_type,
|
|
963
|
+
"iteration": data.get("iteration", 0),
|
|
964
|
+
"max_iterations": data.get("max_iterations", config_max_iterations),
|
|
965
|
+
"current_tool": data.get("tool_type"),
|
|
966
|
+
"current_query": data.get("query"),
|
|
967
|
+
"tools_used": data.get("tools_used", []),
|
|
968
|
+
}
|
|
969
|
+
# Schedule async update
|
|
970
|
+
asyncio.create_task(update_active_task(block.block_id, task_info))
|
|
971
|
+
|
|
972
|
+
# Also log the detailed progress
|
|
973
|
+
self._log_researching_progress(
|
|
974
|
+
event_type,
|
|
975
|
+
block_id=block.block_id,
|
|
976
|
+
sub_topic=block.sub_topic,
|
|
977
|
+
execution_mode="parallel",
|
|
978
|
+
**data,
|
|
979
|
+
)
|
|
980
|
+
|
|
981
|
+
# Execute research loop with async wrappers
|
|
982
|
+
result = await research.process(
|
|
983
|
+
topic_block=block,
|
|
984
|
+
call_tool_callback=self._call_tool,
|
|
985
|
+
note_agent=self.agents["note"],
|
|
986
|
+
citation_manager=async_citation_manager,
|
|
987
|
+
queue=self.queue,
|
|
988
|
+
manager_agent=async_manager_agent,
|
|
989
|
+
config=self.config,
|
|
990
|
+
progress_callback=parallel_iteration_callback,
|
|
991
|
+
)
|
|
992
|
+
|
|
993
|
+
# Mark as completed (thread-safe)
|
|
994
|
+
await manager.complete_task_async(block.block_id)
|
|
995
|
+
completed_count["value"] += 1
|
|
996
|
+
|
|
997
|
+
# Remove from active tasks
|
|
998
|
+
await update_active_task(block.block_id, None)
|
|
999
|
+
|
|
1000
|
+
self._log_researching_progress(
|
|
1001
|
+
"block_completed",
|
|
1002
|
+
block_id=block.block_id,
|
|
1003
|
+
sub_topic=block.sub_topic,
|
|
1004
|
+
iterations=result.get("iterations", 0),
|
|
1005
|
+
tools_used=result.get("tools_used", []),
|
|
1006
|
+
queries_used=result.get("queries_used", []),
|
|
1007
|
+
current_block=completed_count["value"],
|
|
1008
|
+
total_blocks=total_blocks,
|
|
1009
|
+
execution_mode="parallel",
|
|
1010
|
+
)
|
|
1011
|
+
|
|
1012
|
+
if self.logger:
|
|
1013
|
+
self.logger.success(f"[{block.block_id}] ✓ Completed: {block.sub_topic}")
|
|
1014
|
+
|
|
1015
|
+
return result
|
|
1016
|
+
|
|
1017
|
+
except Exception as e:
|
|
1018
|
+
# Mark as failed (thread-safe)
|
|
1019
|
+
await manager.fail_task_async(block.block_id, str(e))
|
|
1020
|
+
completed_count["value"] += 1
|
|
1021
|
+
|
|
1022
|
+
# Remove from active tasks
|
|
1023
|
+
await update_active_task(block.block_id, None)
|
|
1024
|
+
|
|
1025
|
+
if self.logger:
|
|
1026
|
+
self.logger.error(f"[{block.block_id}] ✗ Failed: {block.sub_topic} - {e}")
|
|
1027
|
+
|
|
1028
|
+
self._log_researching_progress(
|
|
1029
|
+
"block_failed",
|
|
1030
|
+
block_id=block.block_id,
|
|
1031
|
+
sub_topic=block.sub_topic,
|
|
1032
|
+
error=str(e),
|
|
1033
|
+
execution_mode="parallel",
|
|
1034
|
+
)
|
|
1035
|
+
return None
|
|
1036
|
+
|
|
1037
|
+
# Execute all research tasks in parallel
|
|
1038
|
+
tasks = [research_single_block(block) for block in pending_blocks]
|
|
1039
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
1040
|
+
|
|
1041
|
+
# Handle any exceptions that weren't caught
|
|
1042
|
+
for i, result in enumerate(results):
|
|
1043
|
+
if isinstance(result, Exception):
|
|
1044
|
+
block = pending_blocks[i]
|
|
1045
|
+
await manager.fail_task_async(block.block_id, str(result))
|
|
1046
|
+
if self.logger:
|
|
1047
|
+
self.logger.error(f"[{block.block_id}] ✗ Exception: {result}")
|
|
1048
|
+
|
|
1049
|
+
# Wait for any dynamically added topics (if manager adds new topics during research)
|
|
1050
|
+
# Continue until all tasks are processed (completed or failed)
|
|
1051
|
+
max_wait_iterations = 100 # Prevent infinite loop
|
|
1052
|
+
wait_count = 0
|
|
1053
|
+
|
|
1054
|
+
while True:
|
|
1055
|
+
# Check if all blocks are processed (COMPLETED or FAILED, not PENDING or RESEARCHING)
|
|
1056
|
+
stats = self.queue.get_statistics()
|
|
1057
|
+
pending_count = stats.get("pending", 0)
|
|
1058
|
+
researching_count = stats.get("researching", 0)
|
|
1059
|
+
|
|
1060
|
+
# Exit if no pending or researching tasks
|
|
1061
|
+
if pending_count == 0 and researching_count == 0:
|
|
1062
|
+
break
|
|
1063
|
+
|
|
1064
|
+
# Get any newly added pending blocks
|
|
1065
|
+
new_pending = [b for b in self.queue.blocks if b.status == TopicStatus.PENDING]
|
|
1066
|
+
if not new_pending:
|
|
1067
|
+
# No pending blocks, but there might be researching ones
|
|
1068
|
+
# Wait a bit for them to complete
|
|
1069
|
+
wait_count += 1
|
|
1070
|
+
if wait_count > max_wait_iterations:
|
|
1071
|
+
self.logger.warning(
|
|
1072
|
+
"Max wait iterations reached, exiting parallel research loop"
|
|
1073
|
+
)
|
|
1074
|
+
break
|
|
1075
|
+
await asyncio.sleep(0.1)
|
|
1076
|
+
continue
|
|
1077
|
+
|
|
1078
|
+
# Reset wait count when we have new work
|
|
1079
|
+
wait_count = 0
|
|
1080
|
+
|
|
1081
|
+
# Research newly added blocks
|
|
1082
|
+
new_tasks = [research_single_block(block) for block in new_pending]
|
|
1083
|
+
new_results = await asyncio.gather(*new_tasks, return_exceptions=True)
|
|
1084
|
+
|
|
1085
|
+
for i, result in enumerate(new_results):
|
|
1086
|
+
if isinstance(result, Exception):
|
|
1087
|
+
block = new_pending[i]
|
|
1088
|
+
await manager.fail_task_async(block.block_id, str(result))
|
|
1089
|
+
|
|
1090
|
+
stats = self.queue.get_statistics()
|
|
1091
|
+
self._log_researching_progress(
|
|
1092
|
+
"researching_completed",
|
|
1093
|
+
completed_blocks=stats["completed"],
|
|
1094
|
+
total_tool_calls=stats["total_tool_calls"],
|
|
1095
|
+
execution_mode="parallel",
|
|
1096
|
+
)
|
|
1097
|
+
|
|
1098
|
+
self.logger.success("\nPhase 2 Completed (Parallel Mode):")
|
|
1099
|
+
self.logger.info(f" - Completed Topics: {stats['completed']}")
|
|
1100
|
+
self.logger.info(f" - Total Tool Calls: {stats['total_tool_calls']}")
|
|
1101
|
+
self.logger.info(f" - Failed Topics: {stats.get('failed', 0)}")
|
|
1102
|
+
|
|
1103
|
+
def _log_researching_progress(self, status: str, **payload: Any) -> None:
|
|
1104
|
+
"""Record researching stage progress (thread-safe for parallel mode)"""
|
|
1105
|
+
event = {"status": status, "timestamp": datetime.now().isoformat()}
|
|
1106
|
+
event.update({k: v for k, v in payload.items() if v is not None})
|
|
1107
|
+
|
|
1108
|
+
# Use lock to prevent concurrent file writes in parallel mode
|
|
1109
|
+
with self._progress_file_lock:
|
|
1110
|
+
if "researching" not in self._stage_events:
|
|
1111
|
+
self._stage_events["researching"] = []
|
|
1112
|
+
self._stage_events["researching"].append(event)
|
|
1113
|
+
|
|
1114
|
+
# Save to file
|
|
1115
|
+
research_progress_file = self.cache_dir / "researching_progress.json"
|
|
1116
|
+
context = {
|
|
1117
|
+
"research_id": self.research_id,
|
|
1118
|
+
"stage": "researching",
|
|
1119
|
+
"input_topic": self.input_topic,
|
|
1120
|
+
"optimized_topic": self.optimized_topic,
|
|
1121
|
+
"events": self._stage_events["researching"],
|
|
1122
|
+
}
|
|
1123
|
+
with open(research_progress_file, "w", encoding="utf-8") as f:
|
|
1124
|
+
json.dump(context, f, ensure_ascii=False, indent=2)
|
|
1125
|
+
|
|
1126
|
+
# Send progress via callback
|
|
1127
|
+
if self.progress_callback:
|
|
1128
|
+
try:
|
|
1129
|
+
progress_event = {
|
|
1130
|
+
"type": "progress",
|
|
1131
|
+
"stage": "researching",
|
|
1132
|
+
"status": status,
|
|
1133
|
+
"research_id": self.research_id,
|
|
1134
|
+
**{k: v for k, v in payload.items() if v is not None},
|
|
1135
|
+
}
|
|
1136
|
+
self.progress_callback(progress_event)
|
|
1137
|
+
except Exception as e:
|
|
1138
|
+
self.logger.warning(f"Progress callback failed: {e}")
|
|
1139
|
+
|
|
1140
|
+
def _create_iteration_progress_callback(
|
|
1141
|
+
self,
|
|
1142
|
+
block_id: str,
|
|
1143
|
+
sub_topic: str,
|
|
1144
|
+
execution_mode: str,
|
|
1145
|
+
current_block: int = None,
|
|
1146
|
+
total_blocks: int = None,
|
|
1147
|
+
) -> Callable:
|
|
1148
|
+
"""
|
|
1149
|
+
Create a progress callback for research iterations
|
|
1150
|
+
|
|
1151
|
+
Args:
|
|
1152
|
+
block_id: Block ID for the current topic
|
|
1153
|
+
sub_topic: Current sub-topic being researched
|
|
1154
|
+
execution_mode: 'series' or 'parallel'
|
|
1155
|
+
current_block: Current block number (for series mode)
|
|
1156
|
+
total_blocks: Total number of blocks
|
|
1157
|
+
|
|
1158
|
+
Returns:
|
|
1159
|
+
Callback function for iteration progress
|
|
1160
|
+
"""
|
|
1161
|
+
|
|
1162
|
+
def iteration_callback(event_type: str, **data: Any):
|
|
1163
|
+
"""Callback for iteration progress events"""
|
|
1164
|
+
payload = {
|
|
1165
|
+
"block_id": block_id,
|
|
1166
|
+
"sub_topic": sub_topic,
|
|
1167
|
+
"execution_mode": execution_mode,
|
|
1168
|
+
}
|
|
1169
|
+
if current_block is not None:
|
|
1170
|
+
payload["current_block"] = current_block
|
|
1171
|
+
if total_blocks is not None:
|
|
1172
|
+
payload["total_blocks"] = total_blocks
|
|
1173
|
+
payload.update(data)
|
|
1174
|
+
|
|
1175
|
+
self._log_researching_progress(event_type, **payload)
|
|
1176
|
+
|
|
1177
|
+
return iteration_callback
|
|
1178
|
+
|
|
1179
|
+
async def _phase3_reporting(self, topic: str) -> dict[str, Any]:
|
|
1180
|
+
"""
|
|
1181
|
+
Phase 3: Report Generation
|
|
1182
|
+
|
|
1183
|
+
Args:
|
|
1184
|
+
topic: Research topic
|
|
1185
|
+
|
|
1186
|
+
Returns:
|
|
1187
|
+
Report result
|
|
1188
|
+
"""
|
|
1189
|
+
reporting = self.agents["reporting"]
|
|
1190
|
+
|
|
1191
|
+
# Set citation manager
|
|
1192
|
+
reporting.set_citation_manager(self.citation_manager)
|
|
1193
|
+
|
|
1194
|
+
# Generate report
|
|
1195
|
+
report_result = await reporting.process(
|
|
1196
|
+
self.queue, topic, progress_callback=self._report_progress_callback
|
|
1197
|
+
)
|
|
1198
|
+
|
|
1199
|
+
self.logger.success("\nPhase 3 Completed:")
|
|
1200
|
+
self.logger.info(f" - Report Word Count: {report_result['word_count']}")
|
|
1201
|
+
self.logger.info(f" - Sections: {report_result['sections']}")
|
|
1202
|
+
self.logger.info(f" - Citations: {report_result['citations']}")
|
|
1203
|
+
|
|
1204
|
+
return report_result
|
|
1205
|
+
|
|
1206
|
+
def _log_progress(self, stage: str, status: str, **payload: Any) -> None:
|
|
1207
|
+
"""Record stage progress to JSON file and send progress via callback"""
|
|
1208
|
+
if stage not in self._stage_events:
|
|
1209
|
+
return
|
|
1210
|
+
event = {"status": status, "timestamp": datetime.now().isoformat()}
|
|
1211
|
+
event.update({k: v for k, v in payload.items() if v is not None})
|
|
1212
|
+
self._stage_events[stage].append(event)
|
|
1213
|
+
file_path = self.plan_progress_file if stage == "planning" else self.report_progress_file
|
|
1214
|
+
context = {
|
|
1215
|
+
"research_id": self.research_id,
|
|
1216
|
+
"stage": stage,
|
|
1217
|
+
"input_topic": self.input_topic,
|
|
1218
|
+
"optimized_topic": self.optimized_topic,
|
|
1219
|
+
"events": self._stage_events[stage],
|
|
1220
|
+
}
|
|
1221
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
1222
|
+
json.dump(context, f, ensure_ascii=False, indent=2)
|
|
1223
|
+
|
|
1224
|
+
# Send progress via callback (if callback function is set)
|
|
1225
|
+
if self.progress_callback:
|
|
1226
|
+
try:
|
|
1227
|
+
progress_event = {
|
|
1228
|
+
"type": "progress",
|
|
1229
|
+
"stage": stage,
|
|
1230
|
+
"status": status,
|
|
1231
|
+
"research_id": self.research_id,
|
|
1232
|
+
**{k: v for k, v in payload.items() if v is not None},
|
|
1233
|
+
}
|
|
1234
|
+
self.progress_callback(progress_event)
|
|
1235
|
+
except Exception as e:
|
|
1236
|
+
self.logger.warning(f"Progress callback failed: {e}")
|
|
1237
|
+
|
|
1238
|
+
def _report_progress_callback(self, event: dict[str, Any]) -> None:
|
|
1239
|
+
"""Reporting stage progress callback"""
|
|
1240
|
+
status = event.pop("status", "unknown")
|
|
1241
|
+
self._log_progress("reporting", status, **event)
|
|
1242
|
+
|
|
1243
|
+
|
|
1244
|
+
async def main():
|
|
1245
|
+
"""Main function"""
|
|
1246
|
+
import argparse
|
|
1247
|
+
|
|
1248
|
+
from dotenv import load_dotenv
|
|
1249
|
+
import yaml
|
|
1250
|
+
|
|
1251
|
+
from src.services.llm import get_llm_config
|
|
1252
|
+
|
|
1253
|
+
# Load environment variables
|
|
1254
|
+
load_dotenv()
|
|
1255
|
+
|
|
1256
|
+
# Parse command line arguments
|
|
1257
|
+
parser = argparse.ArgumentParser(description="DR-in-KG 2.0 - Deep Research System")
|
|
1258
|
+
parser.add_argument("--topic", type=str, required=True, help="Research topic")
|
|
1259
|
+
parser.add_argument("--config", type=str, default="config.yaml", help="Configuration file")
|
|
1260
|
+
parser.add_argument(
|
|
1261
|
+
"--preset", type=str, choices=["quick", "medium", "deep", "auto"], help="Preset mode"
|
|
1262
|
+
)
|
|
1263
|
+
|
|
1264
|
+
args = parser.parse_args()
|
|
1265
|
+
|
|
1266
|
+
# Load configuration
|
|
1267
|
+
config_path = PROJECT_ROOT / args.config
|
|
1268
|
+
if not config_path.exists():
|
|
1269
|
+
logger = get_logger("Research")
|
|
1270
|
+
logger.error(f"Configuration file not found: {config_path}")
|
|
1271
|
+
sys.exit(1)
|
|
1272
|
+
|
|
1273
|
+
with open(config_path, encoding="utf-8") as f:
|
|
1274
|
+
config = yaml.safe_load(f)
|
|
1275
|
+
|
|
1276
|
+
# Apply preset
|
|
1277
|
+
if args.preset and "presets" in config and args.preset in config["presets"]:
|
|
1278
|
+
preset = config["presets"][args.preset]
|
|
1279
|
+
# Merge preset configuration
|
|
1280
|
+
for key, value in preset.items():
|
|
1281
|
+
if key in config and isinstance(value, dict):
|
|
1282
|
+
config[key].update(value)
|
|
1283
|
+
|
|
1284
|
+
# Get LLM configuration
|
|
1285
|
+
llm_config = get_llm_config()
|
|
1286
|
+
|
|
1287
|
+
# Create research pipeline
|
|
1288
|
+
pipeline = ResearchPipeline(
|
|
1289
|
+
config=config, api_key=llm_config.api_key, base_url=llm_config.base_url
|
|
1290
|
+
)
|
|
1291
|
+
|
|
1292
|
+
# Execute research
|
|
1293
|
+
result = await pipeline.run(args.topic)
|
|
1294
|
+
|
|
1295
|
+
logger = get_logger("Research")
|
|
1296
|
+
logger.success("\n🎉 Research completed!")
|
|
1297
|
+
logger.info(f"Report location: {result['final_report_path']}")
|
|
1298
|
+
|
|
1299
|
+
|
|
1300
|
+
if __name__ == "__main__":
|
|
1301
|
+
if sys.platform == "win32":
|
|
1302
|
+
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
|
1303
|
+
|
|
1304
|
+
asyncio.run(main())
|
|
1305
|
+
|
|
1306
|
+
|
|
1307
|
+
# Backward compatibility alias: old code can use ResearchPipeline, new code referencing ResearchPipeline2 won't error
|
|
1308
|
+
ResearchPipeline2 = ResearchPipeline
|
|
1309
|
+
__all__ = ["ResearchPipeline", "ResearchPipeline2"]
|