realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
InvestigateAgent - Investigator
|
|
4
|
+
Generates query actions and calls tools based on current memory and reflections.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import sys
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
# Add project root to path
|
|
12
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
|
13
|
+
sys.path.insert(0, str(project_root))
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
|
|
17
|
+
from src.agents.base_agent import BaseAgent
|
|
18
|
+
from src.tools import query_numbered_item, rag_search, web_search
|
|
19
|
+
|
|
20
|
+
from ..memory import CitationMemory, InvestigateMemory, KnowledgeItem
|
|
21
|
+
from ..utils.json_utils import extract_json_from_text
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class InvestigateAgent(BaseAgent):
|
|
25
|
+
"""Investigator Agent - Generates queries and calls tools"""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
config: dict[str, Any],
|
|
30
|
+
api_key: str,
|
|
31
|
+
base_url: str,
|
|
32
|
+
api_version: str | None = None,
|
|
33
|
+
token_tracker=None,
|
|
34
|
+
):
|
|
35
|
+
language = config.get("system", {}).get("language", "zh")
|
|
36
|
+
super().__init__(
|
|
37
|
+
module_name="solve",
|
|
38
|
+
agent_name="investigate_agent",
|
|
39
|
+
api_key=api_key,
|
|
40
|
+
base_url=base_url,
|
|
41
|
+
api_version=api_version,
|
|
42
|
+
language=language,
|
|
43
|
+
config=config,
|
|
44
|
+
token_tracker=token_tracker,
|
|
45
|
+
)
|
|
46
|
+
# Read web_search enabled config from tools.web_search.enabled
|
|
47
|
+
self.enable_web_search = config.get("tools", {}).get("web_search", {}).get("enabled", True)
|
|
48
|
+
|
|
49
|
+
# Read agent-specific config from solve.agents.investigate_agent
|
|
50
|
+
agent_config = config.get("solve", {}).get("agents", {}).get("investigate_agent", {})
|
|
51
|
+
self.max_actions_per_round = agent_config.get("max_actions_per_round", 1)
|
|
52
|
+
self.max_iterations = agent_config.get("max_iterations", 3)
|
|
53
|
+
|
|
54
|
+
async def process(
|
|
55
|
+
self,
|
|
56
|
+
question: str,
|
|
57
|
+
memory: InvestigateMemory,
|
|
58
|
+
citation_memory: CitationMemory,
|
|
59
|
+
kb_name: str = "ai_textbook",
|
|
60
|
+
output_dir: str | None = None,
|
|
61
|
+
verbose: bool = True,
|
|
62
|
+
) -> dict[str, Any]:
|
|
63
|
+
"""
|
|
64
|
+
Process investigation flow (supports multiple tools per round)
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
question: User question
|
|
68
|
+
memory: Investigation memory
|
|
69
|
+
citation_memory: Citation memory (for registering citations)
|
|
70
|
+
kb_name: Knowledge base name
|
|
71
|
+
output_dir: Output directory
|
|
72
|
+
verbose: Whether to print detailed info
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
dict: Investigation result
|
|
76
|
+
{
|
|
77
|
+
'reasoning': str,
|
|
78
|
+
'should_stop': bool,
|
|
79
|
+
'knowledge_item_ids': List[str],
|
|
80
|
+
'actions': List[Dict[str, Any]]
|
|
81
|
+
}
|
|
82
|
+
"""
|
|
83
|
+
if citation_memory is None:
|
|
84
|
+
raise ValueError(
|
|
85
|
+
"citation_memory cannot be None, InvestigateAgent needs it for citation registration"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# 1. Build context
|
|
89
|
+
context = self._build_context(question, memory)
|
|
90
|
+
|
|
91
|
+
# 2. Build prompts
|
|
92
|
+
system_prompt = self._build_system_prompt()
|
|
93
|
+
user_prompt = self._build_user_prompt(context)
|
|
94
|
+
|
|
95
|
+
# 3. Call LLM
|
|
96
|
+
response = await self.call_llm(
|
|
97
|
+
user_prompt=user_prompt,
|
|
98
|
+
system_prompt=system_prompt,
|
|
99
|
+
verbose=verbose,
|
|
100
|
+
response_format={"type": "json_object"},
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# 4. Parse output (JSON)
|
|
104
|
+
parsed_result = extract_json_from_text(response)
|
|
105
|
+
|
|
106
|
+
if not parsed_result or not isinstance(parsed_result, dict):
|
|
107
|
+
self.logger.warning("Parse failed: LLM did not return valid JSON")
|
|
108
|
+
return {
|
|
109
|
+
"reasoning": "Parse failed: invalid JSON",
|
|
110
|
+
"should_stop": True,
|
|
111
|
+
"knowledge_item_ids": [],
|
|
112
|
+
"actions": [],
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
reasoning = parsed_result.get("reasoning", "")
|
|
116
|
+
tool_plans = parsed_result.get("plan", [])
|
|
117
|
+
|
|
118
|
+
# Ensure tool_plans is a list (handle case where LLM returns dict instead of list)
|
|
119
|
+
if not isinstance(tool_plans, list):
|
|
120
|
+
if isinstance(tool_plans, dict):
|
|
121
|
+
# If plan is a dict, wrap it in a list
|
|
122
|
+
self.logger.warning("Parse warning: 'plan' field is a dict, wrapping in list")
|
|
123
|
+
tool_plans = [tool_plans]
|
|
124
|
+
else:
|
|
125
|
+
self.logger.warning(
|
|
126
|
+
"Parse warning: 'plan' field is not a list or dict, using empty list"
|
|
127
|
+
)
|
|
128
|
+
tool_plans = []
|
|
129
|
+
|
|
130
|
+
# 5. Determine if should stop
|
|
131
|
+
should_stop = False
|
|
132
|
+
if not tool_plans:
|
|
133
|
+
should_stop = True
|
|
134
|
+
else:
|
|
135
|
+
for plan in tool_plans:
|
|
136
|
+
if plan.get("tool") == "none":
|
|
137
|
+
should_stop = True
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
if should_stop:
|
|
141
|
+
return {
|
|
142
|
+
"reasoning": reasoning,
|
|
143
|
+
"should_stop": True,
|
|
144
|
+
"knowledge_item_ids": [],
|
|
145
|
+
"actions": [],
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
# 6. Execute multiple tool calls (limited by max_actions_per_round)
|
|
149
|
+
knowledge_ids: list[str] = []
|
|
150
|
+
executed_actions: list[dict[str, Any]] = []
|
|
151
|
+
|
|
152
|
+
# Limit number of actions per round based on config
|
|
153
|
+
tool_plans_to_execute = tool_plans[: self.max_actions_per_round]
|
|
154
|
+
|
|
155
|
+
for plan in tool_plans_to_execute:
|
|
156
|
+
tool_type = plan.get("tool")
|
|
157
|
+
if not tool_type:
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
query = plan.get("query", "")
|
|
161
|
+
identifier = plan.get("identifier")
|
|
162
|
+
|
|
163
|
+
if tool_type == "none":
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
knowledge_item = await self._execute_single_action(
|
|
167
|
+
tool_selection=tool_type,
|
|
168
|
+
query=query,
|
|
169
|
+
identifier=identifier,
|
|
170
|
+
kb_name=kb_name,
|
|
171
|
+
output_dir=output_dir,
|
|
172
|
+
citation_memory=citation_memory,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
executed_actions.append(
|
|
176
|
+
{
|
|
177
|
+
"tool_type": tool_type,
|
|
178
|
+
"query": query,
|
|
179
|
+
"identifier": identifier,
|
|
180
|
+
"cite_id": knowledge_item.cite_id if knowledge_item else None,
|
|
181
|
+
}
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
if knowledge_item:
|
|
185
|
+
memory.add_knowledge(knowledge_item)
|
|
186
|
+
knowledge_ids.append(knowledge_item.cite_id)
|
|
187
|
+
|
|
188
|
+
if knowledge_ids and output_dir:
|
|
189
|
+
memory.save()
|
|
190
|
+
|
|
191
|
+
# 7. Return results
|
|
192
|
+
return {
|
|
193
|
+
"reasoning": reasoning,
|
|
194
|
+
"should_stop": False,
|
|
195
|
+
"knowledge_item_ids": knowledge_ids,
|
|
196
|
+
"actions": executed_actions,
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
def _build_context(self, question: str, memory: InvestigateMemory) -> dict[str, Any]:
|
|
200
|
+
"""Build context (pass full content, no truncation)"""
|
|
201
|
+
knowledge_chain_full = []
|
|
202
|
+
for item in memory.knowledge_chain:
|
|
203
|
+
knowledge_chain_full.append(
|
|
204
|
+
{
|
|
205
|
+
"cite_id": item.cite_id,
|
|
206
|
+
"tool_type": item.tool_type,
|
|
207
|
+
"query": item.query,
|
|
208
|
+
"raw_result": item.raw_result,
|
|
209
|
+
"summary": item.summary,
|
|
210
|
+
}
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
remaining_questions_full = []
|
|
214
|
+
if memory.reflections and memory.reflections.remaining_questions:
|
|
215
|
+
remaining_questions_full = memory.reflections.remaining_questions.copy()
|
|
216
|
+
knowledge_chain_summary = (
|
|
217
|
+
"\n".join(
|
|
218
|
+
f"- {item.cite_id} ({item.tool_type}): {item.summary or item.raw_result[:200]}"
|
|
219
|
+
for item in memory.knowledge_chain
|
|
220
|
+
)
|
|
221
|
+
if memory.knowledge_chain
|
|
222
|
+
else "(none)"
|
|
223
|
+
)
|
|
224
|
+
reflections_summary = (
|
|
225
|
+
"\n".join(f"- {q}" for q in remaining_questions_full)
|
|
226
|
+
if remaining_questions_full
|
|
227
|
+
else "(no remaining questions)"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
return {
|
|
231
|
+
"question": question,
|
|
232
|
+
"num_knowledge": len(memory.knowledge_chain),
|
|
233
|
+
"knowledge_chain_full": knowledge_chain_full,
|
|
234
|
+
"knowledge_chain_summary": knowledge_chain_summary,
|
|
235
|
+
"reflections_summary": reflections_summary,
|
|
236
|
+
"remaining_questions": remaining_questions_full,
|
|
237
|
+
"action_queue": "(no action history)",
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
def _build_system_prompt(self) -> str:
|
|
241
|
+
"""Build system prompt"""
|
|
242
|
+
prompt = self.get_prompt("system") if self.has_prompts() else None
|
|
243
|
+
if not prompt:
|
|
244
|
+
raise ValueError(
|
|
245
|
+
"InvestigateAgent missing system prompt. Configure in src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml"
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# If web_search is disabled, remove web_search related content from prompt
|
|
249
|
+
if not self.enable_web_search:
|
|
250
|
+
# Get the web_search disabled prompt if available, otherwise filter out web_search lines
|
|
251
|
+
web_search_disabled_prompt = (
|
|
252
|
+
self.get_prompt("web_search_disabled") if self.has_prompts() else None
|
|
253
|
+
)
|
|
254
|
+
if web_search_disabled_prompt:
|
|
255
|
+
# Replace web_search description with disabled message
|
|
256
|
+
prompt = prompt.replace(
|
|
257
|
+
self.get_prompt("web_search_description") or "", web_search_disabled_prompt
|
|
258
|
+
)
|
|
259
|
+
else:
|
|
260
|
+
# Simple filter: remove lines containing web_search tool description
|
|
261
|
+
lines = prompt.split("\n")
|
|
262
|
+
filtered_lines = []
|
|
263
|
+
for line in lines:
|
|
264
|
+
# Skip lines that describe web_search as an available tool
|
|
265
|
+
if "`web_search`" in line and (
|
|
266
|
+
"Use Sparingly" in line or "latest news" in line or "Web Search" in line
|
|
267
|
+
):
|
|
268
|
+
continue
|
|
269
|
+
# Also remove web_search from tool list in output format
|
|
270
|
+
if "web_search" in line and (
|
|
271
|
+
"rag_naive | rag_hybrid |" in line or 'tool":' in line
|
|
272
|
+
):
|
|
273
|
+
line = (
|
|
274
|
+
line.replace(" | web_search", "")
|
|
275
|
+
.replace("| web_search", "")
|
|
276
|
+
.replace("web_search |", "")
|
|
277
|
+
.replace("web_search", "")
|
|
278
|
+
)
|
|
279
|
+
filtered_lines.append(line)
|
|
280
|
+
prompt = "\n".join(filtered_lines)
|
|
281
|
+
|
|
282
|
+
return prompt
|
|
283
|
+
|
|
284
|
+
def _build_user_prompt(self, context: dict[str, Any]) -> str:
|
|
285
|
+
"""Build user prompt (pass full content)"""
|
|
286
|
+
template = self.get_prompt("user_template") if self.has_prompts() else None
|
|
287
|
+
if not template:
|
|
288
|
+
raise ValueError(
|
|
289
|
+
"InvestigateAgent missing user prompt template. Configure in prompts/en/analysis_loop/investigate_agent.yaml"
|
|
290
|
+
)
|
|
291
|
+
return template.format(**context)
|
|
292
|
+
|
|
293
|
+
async def _execute_single_action(
|
|
294
|
+
self,
|
|
295
|
+
tool_selection: str,
|
|
296
|
+
query: str,
|
|
297
|
+
identifier: str | None,
|
|
298
|
+
kb_name: str,
|
|
299
|
+
output_dir: str | None,
|
|
300
|
+
citation_memory: CitationMemory,
|
|
301
|
+
) -> KnowledgeItem | None:
|
|
302
|
+
"""Execute a single tool call"""
|
|
303
|
+
import time
|
|
304
|
+
|
|
305
|
+
start_time = time.time()
|
|
306
|
+
tool_input = {"query": query, "identifier": identifier, "kb_name": kb_name}
|
|
307
|
+
|
|
308
|
+
try:
|
|
309
|
+
if tool_selection == "rag_naive":
|
|
310
|
+
result = await self._call_rag_naive(query, kb_name, output_dir)
|
|
311
|
+
raw_result = result.get("answer", "")
|
|
312
|
+
|
|
313
|
+
elif tool_selection == "rag_hybrid":
|
|
314
|
+
result = await self._call_rag_hybrid(query, kb_name, output_dir)
|
|
315
|
+
raw_result = result.get("answer", "")
|
|
316
|
+
|
|
317
|
+
elif tool_selection == "web_search":
|
|
318
|
+
# Check if web_search is enabled
|
|
319
|
+
if not self.enable_web_search:
|
|
320
|
+
self.logger.warning(
|
|
321
|
+
"Tool call rejected (web_search): web_search is disabled in config"
|
|
322
|
+
)
|
|
323
|
+
return None
|
|
324
|
+
result = await self._call_web_search(query, output_dir)
|
|
325
|
+
raw_result = json.dumps(result, ensure_ascii=False, indent=2)
|
|
326
|
+
|
|
327
|
+
elif tool_selection == "query_item":
|
|
328
|
+
identifier_to_use = identifier or query
|
|
329
|
+
|
|
330
|
+
if (
|
|
331
|
+
not identifier_to_use
|
|
332
|
+
or not isinstance(identifier_to_use, str)
|
|
333
|
+
or not identifier_to_use.strip()
|
|
334
|
+
):
|
|
335
|
+
self.logger.warning(
|
|
336
|
+
"Tool call failed (query_item): identifier is empty or invalid"
|
|
337
|
+
)
|
|
338
|
+
return None
|
|
339
|
+
|
|
340
|
+
result = await self._call_query_item(identifier_to_use, kb_name)
|
|
341
|
+
raw_result = result.get("content", result.get("answer", ""))
|
|
342
|
+
|
|
343
|
+
else:
|
|
344
|
+
self.logger.warning(f"Unknown tool type: {tool_selection}")
|
|
345
|
+
return None
|
|
346
|
+
|
|
347
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
|
348
|
+
|
|
349
|
+
# Create and register citation
|
|
350
|
+
cite_id = citation_memory.add_citation(
|
|
351
|
+
tool_type=tool_selection,
|
|
352
|
+
query=query,
|
|
353
|
+
raw_result=raw_result,
|
|
354
|
+
stage="analysis",
|
|
355
|
+
metadata={"identifier": identifier},
|
|
356
|
+
)
|
|
357
|
+
citation_memory.save()
|
|
358
|
+
|
|
359
|
+
# Log tool call
|
|
360
|
+
self.logger.log_tool_call(
|
|
361
|
+
tool_name=tool_selection,
|
|
362
|
+
tool_input=tool_input,
|
|
363
|
+
tool_output=result,
|
|
364
|
+
status="success",
|
|
365
|
+
elapsed_ms=elapsed_ms,
|
|
366
|
+
citation_id=cite_id,
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# Create knowledge item
|
|
370
|
+
knowledge_item = KnowledgeItem(
|
|
371
|
+
cite_id=cite_id,
|
|
372
|
+
tool_type=tool_selection,
|
|
373
|
+
query=query,
|
|
374
|
+
raw_result=raw_result,
|
|
375
|
+
summary="", # Generated by NoteAgent
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
return knowledge_item
|
|
379
|
+
|
|
380
|
+
except Exception as e:
|
|
381
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
|
382
|
+
error_msg = str(e)
|
|
383
|
+
|
|
384
|
+
self.logger.log_tool_call(
|
|
385
|
+
tool_name=tool_selection,
|
|
386
|
+
tool_input=tool_input,
|
|
387
|
+
tool_output=error_msg,
|
|
388
|
+
status="failed",
|
|
389
|
+
elapsed_ms=elapsed_ms,
|
|
390
|
+
error=error_msg,
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
self.logger.warning(f"Tool call failed ({tool_selection}): {e}")
|
|
394
|
+
return None
|
|
395
|
+
|
|
396
|
+
async def _call_rag_naive(
|
|
397
|
+
self, query: str, kb_name: str, output_dir: str | None
|
|
398
|
+
) -> dict[str, Any]:
|
|
399
|
+
"""Call RAG Naive"""
|
|
400
|
+
return await rag_search(query=query, kb_name=kb_name, mode="naive")
|
|
401
|
+
|
|
402
|
+
async def _call_rag_hybrid(
|
|
403
|
+
self, query: str, kb_name: str, output_dir: str | None
|
|
404
|
+
) -> dict[str, Any]:
|
|
405
|
+
"""Call RAG Hybrid"""
|
|
406
|
+
return await rag_search(query=query, kb_name=kb_name, mode="hybrid")
|
|
407
|
+
|
|
408
|
+
async def _call_web_search(self, query: str, output_dir: str | None) -> dict[str, Any]:
|
|
409
|
+
"""Call Web Search"""
|
|
410
|
+
return web_search(query=query, output_dir=output_dir or "./cache", verbose=False)
|
|
411
|
+
|
|
412
|
+
async def _call_query_item(self, identifier: str, kb_name: str) -> dict[str, Any]:
|
|
413
|
+
"""Call Query Item"""
|
|
414
|
+
return query_numbered_item(identifier=identifier, kb_name=kb_name)
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
NoteAgent - Note taker
|
|
5
|
+
Based on new knowledge, generates or updates notes, annotates covered pain points and missing points
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import sys
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
# Add project root to path
|
|
13
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
|
14
|
+
sys.path.insert(0, str(project_root))
|
|
15
|
+
|
|
16
|
+
from src.agents.base_agent import BaseAgent
|
|
17
|
+
|
|
18
|
+
from ..memory import CitationMemory, InvestigateMemory, KnowledgeItem
|
|
19
|
+
from ..utils import ParseError, validate_note_output
|
|
20
|
+
from ..utils.json_utils import extract_json_from_text
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class NoteAgent(BaseAgent):
|
|
24
|
+
"""Note taker Agent - Generates and updates notes"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
config: dict[str, Any],
|
|
29
|
+
api_key: str,
|
|
30
|
+
base_url: str,
|
|
31
|
+
api_version: str | None = None,
|
|
32
|
+
token_tracker=None,
|
|
33
|
+
):
|
|
34
|
+
language = config.get("system", {}).get("language", "zh")
|
|
35
|
+
super().__init__(
|
|
36
|
+
module_name="solve",
|
|
37
|
+
agent_name="note_agent",
|
|
38
|
+
api_key=api_key,
|
|
39
|
+
base_url=base_url,
|
|
40
|
+
api_version=api_version,
|
|
41
|
+
language=language,
|
|
42
|
+
config=config,
|
|
43
|
+
token_tracker=token_tracker,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
async def process(
|
|
47
|
+
self,
|
|
48
|
+
question: str,
|
|
49
|
+
memory: InvestigateMemory,
|
|
50
|
+
new_knowledge_ids: list[str],
|
|
51
|
+
citation_memory: CitationMemory | None = None,
|
|
52
|
+
output_dir: str | None = None,
|
|
53
|
+
verbose: bool = True,
|
|
54
|
+
) -> dict[str, Any]:
|
|
55
|
+
"""
|
|
56
|
+
Process note generation workflow (supports multiple knowledge_items in single round)
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
question: User question
|
|
60
|
+
memory: Investigation memory
|
|
61
|
+
new_knowledge_ids: List of cite_ids for new knowledge items
|
|
62
|
+
citation_memory: Citation memory
|
|
63
|
+
output_dir: Output directory
|
|
64
|
+
verbose: Whether to print detailed information
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
dict: Aggregated note result
|
|
68
|
+
"""
|
|
69
|
+
if isinstance(new_knowledge_ids, str): # Backward compatibility
|
|
70
|
+
target_ids = [new_knowledge_ids]
|
|
71
|
+
else:
|
|
72
|
+
target_ids = new_knowledge_ids or []
|
|
73
|
+
|
|
74
|
+
if not target_ids:
|
|
75
|
+
return {"success": False, "reason": "new_knowledge_ids is empty"}
|
|
76
|
+
|
|
77
|
+
system_prompt = self._build_system_prompt()
|
|
78
|
+
processed_details = []
|
|
79
|
+
failed_ids = []
|
|
80
|
+
|
|
81
|
+
for cite_id in target_ids:
|
|
82
|
+
knowledge_item = next((k for k in memory.knowledge_chain if k.cite_id == cite_id), None)
|
|
83
|
+
if not knowledge_item:
|
|
84
|
+
failed_ids.append({"cite_id": cite_id, "reason": "knowledge_item not found"})
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
context = self._build_context(question, knowledge_item, memory)
|
|
88
|
+
user_prompt = self._build_user_prompt(context)
|
|
89
|
+
|
|
90
|
+
response = await self.call_llm(
|
|
91
|
+
user_prompt=user_prompt,
|
|
92
|
+
system_prompt=system_prompt,
|
|
93
|
+
verbose=verbose,
|
|
94
|
+
response_format={"type": "json_object"},
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
if verbose:
|
|
98
|
+
self.logger.debug(f"cite_id={cite_id} LLM output stats:")
|
|
99
|
+
self.logger.debug(f" - Total length: {len(response)} chars")
|
|
100
|
+
self.logger.debug(
|
|
101
|
+
f" - Last 100 chars: ...{response[-100:] if len(response) > 100 else response}"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
parsed_result = extract_json_from_text(response)
|
|
106
|
+
if not parsed_result or not isinstance(parsed_result, dict):
|
|
107
|
+
raise ParseError("LLM output is not a valid JSON object")
|
|
108
|
+
# citations is optional, default to empty list
|
|
109
|
+
if parsed_result.get("citations") is None:
|
|
110
|
+
parsed_result["citations"] = []
|
|
111
|
+
elif not isinstance(parsed_result.get("citations"), list):
|
|
112
|
+
raise ParseError("citations field must be a list")
|
|
113
|
+
validate_note_output(parsed_result)
|
|
114
|
+
if verbose:
|
|
115
|
+
summary_len = len(parsed_result.get("summary", ""))
|
|
116
|
+
print(f"📝 [NoteAgent] cite_id={cite_id} summary length: {summary_len}")
|
|
117
|
+
except ParseError as e:
|
|
118
|
+
failed_ids.append({"cite_id": cite_id, "reason": str(e)})
|
|
119
|
+
continue
|
|
120
|
+
except Exception as e:
|
|
121
|
+
failed_ids.append({"cite_id": cite_id, "reason": str(e)})
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
citations = parsed_result.get("citations", [])
|
|
125
|
+
memory.update_knowledge_summary(cite_id=cite_id, summary=parsed_result["summary"])
|
|
126
|
+
|
|
127
|
+
if citation_memory:
|
|
128
|
+
sources = ", ".join(
|
|
129
|
+
citation.get("source", "") for citation in citations if citation.get("source")
|
|
130
|
+
)
|
|
131
|
+
metadata_block = {"extracted_sources": citations} if citations else None
|
|
132
|
+
try:
|
|
133
|
+
citation_memory.update_citation(
|
|
134
|
+
cite_id=cite_id,
|
|
135
|
+
content=parsed_result["summary"],
|
|
136
|
+
source=sources or None,
|
|
137
|
+
metadata=metadata_block,
|
|
138
|
+
stage="analysis",
|
|
139
|
+
)
|
|
140
|
+
citation_memory.save()
|
|
141
|
+
except ValueError:
|
|
142
|
+
if verbose:
|
|
143
|
+
print(f"⚠️ cite_id not found in CitationMemory: {cite_id}")
|
|
144
|
+
|
|
145
|
+
processed_details.append(
|
|
146
|
+
{
|
|
147
|
+
"cite_id": cite_id,
|
|
148
|
+
"summary": parsed_result["summary"],
|
|
149
|
+
"citations_count": len(citations),
|
|
150
|
+
}
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
if processed_details and output_dir:
|
|
154
|
+
memory.save()
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
"success": len(failed_ids) == 0,
|
|
158
|
+
"processed_items": len(processed_details),
|
|
159
|
+
"details": processed_details,
|
|
160
|
+
"failed": failed_ids,
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
def _build_context(
|
|
164
|
+
self, question: str, knowledge_item: KnowledgeItem, memory: InvestigateMemory
|
|
165
|
+
) -> dict[str, Any]:
|
|
166
|
+
"""Build context (pass complete content)"""
|
|
167
|
+
return {
|
|
168
|
+
"question": question,
|
|
169
|
+
"tool_type": knowledge_item.tool_type,
|
|
170
|
+
"query": knowledge_item.query,
|
|
171
|
+
"raw_result": knowledge_item.raw_result, # Complete content
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
def _build_system_prompt(self) -> str:
|
|
175
|
+
"""Build system prompt"""
|
|
176
|
+
prompt = self.get_prompt("system") if self.has_prompts() else None
|
|
177
|
+
if not prompt:
|
|
178
|
+
raise ValueError(
|
|
179
|
+
"NoteAgent missing system prompt, please configure system in prompts/zh/analysis_loop/note_agent.yaml."
|
|
180
|
+
)
|
|
181
|
+
return prompt
|
|
182
|
+
|
|
183
|
+
def _build_user_prompt(self, context: dict[str, Any]) -> str:
|
|
184
|
+
"""Build user prompt (pass complete content)"""
|
|
185
|
+
template = self.get_prompt("user_template") if self.has_prompts() else None
|
|
186
|
+
if not template:
|
|
187
|
+
raise ValueError(
|
|
188
|
+
"NoteAgent missing user prompt template, please configure user_template in prompts/zh/analysis_loop/note_agent.yaml."
|
|
189
|
+
)
|
|
190
|
+
return template.format(**context)
|