realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
Memory System - Memory file system
|
|
4
|
+
Provides implementations of InvestigateMemory and SolveMemory
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .citation_memory import (
|
|
8
|
+
CitationItem,
|
|
9
|
+
CitationMemory,
|
|
10
|
+
)
|
|
11
|
+
from .investigate_memory import (
|
|
12
|
+
InvestigateMemory,
|
|
13
|
+
KnowledgeItem,
|
|
14
|
+
Reflections,
|
|
15
|
+
)
|
|
16
|
+
from .solve_memory import (
|
|
17
|
+
SolveChainStep,
|
|
18
|
+
SolveMemory,
|
|
19
|
+
ToolCallRecord,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
# Investigate Memory
|
|
24
|
+
"InvestigateMemory",
|
|
25
|
+
"KnowledgeItem",
|
|
26
|
+
"Reflections",
|
|
27
|
+
# Solve Memory
|
|
28
|
+
"SolveMemory",
|
|
29
|
+
"SolveChainStep",
|
|
30
|
+
"ToolCallRecord",
|
|
31
|
+
# Citation Memory
|
|
32
|
+
"CitationMemory",
|
|
33
|
+
"CitationItem",
|
|
34
|
+
]
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
CitationMemory - Global citation management system
|
|
4
|
+
Unified management of citation information generated by all tool calls
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import asdict, dataclass, field
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class CitationItem:
|
|
16
|
+
"""Citation item"""
|
|
17
|
+
|
|
18
|
+
cite_id: str # Citation number, e.g., "[rag-1]"
|
|
19
|
+
tool_type: str # Tool type: rag_naive | rag_hybrid | web_search | query_item | code_execution
|
|
20
|
+
query: str # Query content
|
|
21
|
+
raw_result: str = "" # Raw tool return content (complete)
|
|
22
|
+
source: str = "" # Source information (if any)
|
|
23
|
+
content: str = "" # Citation content summary (usually summary)
|
|
24
|
+
stage: str = "solve" # Stage: analysis | solve
|
|
25
|
+
step_id: str | None = None # Belonging step (applicable to Solve stage)
|
|
26
|
+
metadata: dict[str, Any] = field(default_factory=dict) # Additional metadata
|
|
27
|
+
created_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
28
|
+
updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
29
|
+
|
|
30
|
+
def to_dict(self) -> dict[str, Any]:
|
|
31
|
+
return asdict(self)
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def from_dict(cls, data: dict[str, Any]) -> "CitationItem":
|
|
35
|
+
# Compatible with old versions without stage/step_id/metadata fields
|
|
36
|
+
if "stage" not in data:
|
|
37
|
+
data["stage"] = "solve"
|
|
38
|
+
if "metadata" not in data:
|
|
39
|
+
data["metadata"] = {}
|
|
40
|
+
if "updated_at" not in data:
|
|
41
|
+
data["updated_at"] = data.get("created_at", datetime.now().isoformat())
|
|
42
|
+
return cls(**data)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class CitationMemory:
|
|
46
|
+
"""Global citation management system"""
|
|
47
|
+
|
|
48
|
+
def __init__(self, output_dir: str | None = None):
|
|
49
|
+
self.output_dir = output_dir
|
|
50
|
+
self.version = "1.1"
|
|
51
|
+
self.created_at = datetime.now().isoformat()
|
|
52
|
+
self.updated_at = datetime.now().isoformat()
|
|
53
|
+
|
|
54
|
+
# Core data: citation list
|
|
55
|
+
self.citations: list[CitationItem] = []
|
|
56
|
+
|
|
57
|
+
# Citation counter (incremented by tool prefix)
|
|
58
|
+
self.tool_counters: dict[str, int] = {}
|
|
59
|
+
|
|
60
|
+
# File path
|
|
61
|
+
if output_dir:
|
|
62
|
+
self.file_path = Path(output_dir) / "citation_memory.json"
|
|
63
|
+
else:
|
|
64
|
+
self.file_path = None
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def load_or_create(cls, output_dir: str) -> "CitationMemory":
|
|
68
|
+
"""Load existing citation memory or create new memory"""
|
|
69
|
+
file_path = Path(output_dir) / "citation_memory.json"
|
|
70
|
+
|
|
71
|
+
if file_path.exists():
|
|
72
|
+
# Load existing memory
|
|
73
|
+
with open(file_path, encoding="utf-8") as f:
|
|
74
|
+
data = json.load(f)
|
|
75
|
+
|
|
76
|
+
memory = cls(output_dir=output_dir)
|
|
77
|
+
memory.version = data.get("version", "1.1")
|
|
78
|
+
memory.created_at = data.get("created_at", memory.created_at)
|
|
79
|
+
memory.updated_at = data.get("updated_at", memory.updated_at)
|
|
80
|
+
|
|
81
|
+
# Load citation list
|
|
82
|
+
citations_data = data.get("citations", [])
|
|
83
|
+
memory.citations = [CitationItem.from_dict(item) for item in citations_data]
|
|
84
|
+
|
|
85
|
+
# Restore counters
|
|
86
|
+
memory.tool_counters = data.get("tool_counters", {})
|
|
87
|
+
if not memory.tool_counters:
|
|
88
|
+
# Compatible with old versions: guess from existing cite_id
|
|
89
|
+
for citation in memory.citations:
|
|
90
|
+
prefix = memory._get_tool_prefix(citation.tool_type)
|
|
91
|
+
number = memory._extract_counter_from_cite_id(citation.cite_id, prefix)
|
|
92
|
+
if number is not None:
|
|
93
|
+
memory.tool_counters[prefix] = max(
|
|
94
|
+
memory.tool_counters.get(prefix, 0), number
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return memory
|
|
98
|
+
# Create new memory
|
|
99
|
+
return cls(output_dir=output_dir)
|
|
100
|
+
|
|
101
|
+
def add_citation(
|
|
102
|
+
self,
|
|
103
|
+
tool_type: str,
|
|
104
|
+
query: str,
|
|
105
|
+
raw_result: str = "",
|
|
106
|
+
source: str = "",
|
|
107
|
+
content: str = "",
|
|
108
|
+
stage: str = "solve",
|
|
109
|
+
step_id: str | None = None,
|
|
110
|
+
metadata: dict[str, Any] | None = None,
|
|
111
|
+
cite_id: str | None = None,
|
|
112
|
+
) -> str:
|
|
113
|
+
"""
|
|
114
|
+
Add new citation entry
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
tool_type: Tool type
|
|
118
|
+
query: Query content
|
|
119
|
+
raw_result: Raw tool return content
|
|
120
|
+
source: Source information
|
|
121
|
+
content: Citation content summary
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
str: Newly generated or reused cite_id (e.g., "[rag-1]")
|
|
125
|
+
"""
|
|
126
|
+
# If cite_id is provided and already exists, return directly
|
|
127
|
+
if cite_id and self.get_citation(cite_id):
|
|
128
|
+
return cite_id
|
|
129
|
+
|
|
130
|
+
# Generate new cite_id
|
|
131
|
+
if not cite_id:
|
|
132
|
+
cite_id = self._generate_cite_id(tool_type)
|
|
133
|
+
|
|
134
|
+
citation = CitationItem(
|
|
135
|
+
cite_id=cite_id,
|
|
136
|
+
tool_type=tool_type,
|
|
137
|
+
query=query,
|
|
138
|
+
raw_result=raw_result,
|
|
139
|
+
source=source,
|
|
140
|
+
content=content,
|
|
141
|
+
stage=stage,
|
|
142
|
+
step_id=step_id,
|
|
143
|
+
metadata=metadata or {},
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
self.citations.append(citation)
|
|
147
|
+
self.updated_at = datetime.now().isoformat()
|
|
148
|
+
|
|
149
|
+
return cite_id
|
|
150
|
+
|
|
151
|
+
def get_citation(self, cite_id: str) -> CitationItem | None:
|
|
152
|
+
"""Get citation entry by cite_id"""
|
|
153
|
+
for citation in self.citations:
|
|
154
|
+
if citation.cite_id == cite_id:
|
|
155
|
+
return citation
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
def get_all_citations(self) -> list[CitationItem]:
|
|
159
|
+
"""Get all citation entries"""
|
|
160
|
+
return self.citations
|
|
161
|
+
|
|
162
|
+
def get_citations_by_tool_type(self, tool_type: str) -> list[CitationItem]:
|
|
163
|
+
"""Get citation entries by tool type"""
|
|
164
|
+
return [c for c in self.citations if c.tool_type == tool_type]
|
|
165
|
+
|
|
166
|
+
def update_citation(
|
|
167
|
+
self,
|
|
168
|
+
cite_id: str,
|
|
169
|
+
raw_result: str | None = None,
|
|
170
|
+
source: str | None = None,
|
|
171
|
+
content: str | None = None,
|
|
172
|
+
metadata: dict[str, Any] | None = None,
|
|
173
|
+
stage: str | None = None,
|
|
174
|
+
step_id: str | None = None,
|
|
175
|
+
):
|
|
176
|
+
"""Update citation entry information"""
|
|
177
|
+
for citation in self.citations:
|
|
178
|
+
if citation.cite_id == cite_id:
|
|
179
|
+
if raw_result is not None:
|
|
180
|
+
citation.raw_result = raw_result
|
|
181
|
+
if source is not None:
|
|
182
|
+
citation.source = source
|
|
183
|
+
if content is not None:
|
|
184
|
+
citation.content = content
|
|
185
|
+
if metadata:
|
|
186
|
+
citation.metadata.update(metadata)
|
|
187
|
+
if stage:
|
|
188
|
+
citation.stage = stage
|
|
189
|
+
if step_id:
|
|
190
|
+
citation.step_id = step_id
|
|
191
|
+
citation.updated_at = datetime.now().isoformat()
|
|
192
|
+
self.updated_at = datetime.now().isoformat()
|
|
193
|
+
return
|
|
194
|
+
raise ValueError(f"cite_id not found: {cite_id}")
|
|
195
|
+
|
|
196
|
+
def save(self):
|
|
197
|
+
"""Save to JSON file"""
|
|
198
|
+
if not self.file_path:
|
|
199
|
+
raise ValueError("output_dir not set, cannot save")
|
|
200
|
+
|
|
201
|
+
# Ensure directory exists
|
|
202
|
+
self.file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
203
|
+
|
|
204
|
+
self.updated_at = datetime.now().isoformat()
|
|
205
|
+
|
|
206
|
+
# Convert to dictionary
|
|
207
|
+
data = self.to_dict()
|
|
208
|
+
|
|
209
|
+
# Save to file
|
|
210
|
+
with open(self.file_path, "w", encoding="utf-8") as f:
|
|
211
|
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
212
|
+
|
|
213
|
+
def to_dict(self) -> dict[str, Any]:
|
|
214
|
+
"""Convert to dictionary"""
|
|
215
|
+
return {
|
|
216
|
+
"version": self.version,
|
|
217
|
+
"created_at": self.created_at,
|
|
218
|
+
"updated_at": self.updated_at,
|
|
219
|
+
"citations": [item.to_dict() for item in self.citations],
|
|
220
|
+
"tool_counters": self.tool_counters,
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
def format_citations_markdown(
|
|
224
|
+
self, used_cite_ids: list[str] | None = None, language: str = "zh"
|
|
225
|
+
) -> str:
|
|
226
|
+
"""
|
|
227
|
+
Format citation list as Markdown
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
used_cite_ids: Optional, specify citation ID list to format. If provided, only format these citations; if not provided, format all citations.
|
|
231
|
+
language: Language code, 'zh' for Chinese, 'en' for English, default 'zh'
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
str: Markdown-formatted citation list
|
|
235
|
+
"""
|
|
236
|
+
# Set text based on language
|
|
237
|
+
if language == "en":
|
|
238
|
+
no_citations_text = "(No citations)"
|
|
239
|
+
citation_title = "## Citations\n"
|
|
240
|
+
stage_label = "Stage"
|
|
241
|
+
source_label = "Source"
|
|
242
|
+
query_content_label = " - Query Content:"
|
|
243
|
+
citation_content_label = " - Citation Content:"
|
|
244
|
+
else:
|
|
245
|
+
no_citations_text = "(No citations)"
|
|
246
|
+
citation_title = "## Citations\n"
|
|
247
|
+
stage_label = "Stage"
|
|
248
|
+
source_label = "Source"
|
|
249
|
+
query_content_label = " - Query Content:"
|
|
250
|
+
citation_content_label = " - Citation Content:"
|
|
251
|
+
|
|
252
|
+
if not self.citations:
|
|
253
|
+
return no_citations_text
|
|
254
|
+
|
|
255
|
+
# If used_cite_ids is specified, only format these citations
|
|
256
|
+
citations_to_format = self.citations
|
|
257
|
+
if used_cite_ids is not None:
|
|
258
|
+
cite_id_set = set(used_cite_ids)
|
|
259
|
+
citations_to_format = [c for c in self.citations if c.cite_id in cite_id_set]
|
|
260
|
+
if not citations_to_format:
|
|
261
|
+
return no_citations_text
|
|
262
|
+
|
|
263
|
+
lines = [citation_title]
|
|
264
|
+
|
|
265
|
+
for citation in citations_to_format:
|
|
266
|
+
tool_type_display = {
|
|
267
|
+
"rag_naive": "RAG (Naive)",
|
|
268
|
+
"rag_hybrid": "RAG (Hybrid)",
|
|
269
|
+
"web_search": "Web Search",
|
|
270
|
+
"query_item": "Query Item",
|
|
271
|
+
"code_execution": "Code Execution",
|
|
272
|
+
}.get(citation.tool_type, citation.tool_type)
|
|
273
|
+
|
|
274
|
+
line = f"- **{citation.cite_id}** [{tool_type_display}] {stage_label}: {citation.stage}"
|
|
275
|
+
if citation.source:
|
|
276
|
+
line += f" | {source_label}: {citation.source}"
|
|
277
|
+
lines.append(line)
|
|
278
|
+
|
|
279
|
+
if citation.query:
|
|
280
|
+
query_block = self._format_block(
|
|
281
|
+
citation.query,
|
|
282
|
+
language="python" if citation.tool_type == "code_execution" else None,
|
|
283
|
+
)
|
|
284
|
+
lines.append(query_content_label)
|
|
285
|
+
lines.append(self._indent_block(query_block, " "))
|
|
286
|
+
|
|
287
|
+
if citation.content:
|
|
288
|
+
content_block = self._format_block(
|
|
289
|
+
citation.content,
|
|
290
|
+
language="python" if citation.tool_type == "code_execution" else None,
|
|
291
|
+
force_block=True,
|
|
292
|
+
)
|
|
293
|
+
lines.append(citation_content_label)
|
|
294
|
+
lines.append(self._indent_block(content_block, " "))
|
|
295
|
+
elif citation.raw_result:
|
|
296
|
+
raw_block = self._format_block(
|
|
297
|
+
citation.raw_result,
|
|
298
|
+
language="python" if citation.tool_type == "code_execution" else None,
|
|
299
|
+
force_block=True,
|
|
300
|
+
)
|
|
301
|
+
lines.append(citation_content_label)
|
|
302
|
+
lines.append(self._indent_block(raw_block, " "))
|
|
303
|
+
|
|
304
|
+
return "\n".join(lines)
|
|
305
|
+
|
|
306
|
+
# ------------------------------------------------------------------
|
|
307
|
+
# Internal utility methods
|
|
308
|
+
# ------------------------------------------------------------------
|
|
309
|
+
def _get_tool_prefix(self, tool_type: str) -> str:
|
|
310
|
+
"""Generate unified prefix based on tool type"""
|
|
311
|
+
prefix_map = {
|
|
312
|
+
"rag_naive": "rag",
|
|
313
|
+
"rag_hybrid": "rag",
|
|
314
|
+
"web_search": "web",
|
|
315
|
+
"code_execution": "code",
|
|
316
|
+
"query_item": "query",
|
|
317
|
+
}
|
|
318
|
+
normalized = prefix_map.get(tool_type, tool_type.replace("_", "-"))
|
|
319
|
+
return normalized.lower()
|
|
320
|
+
|
|
321
|
+
def _generate_cite_id(self, tool_type: str) -> str:
|
|
322
|
+
prefix = self._get_tool_prefix(tool_type)
|
|
323
|
+
current = self.tool_counters.get(prefix, 0) + 1
|
|
324
|
+
self.tool_counters[prefix] = current
|
|
325
|
+
return f"[{prefix}-{current}]"
|
|
326
|
+
|
|
327
|
+
@staticmethod
|
|
328
|
+
def _format_block(text: str, language: str | None = None, force_block: bool = False) -> str:
|
|
329
|
+
if not text:
|
|
330
|
+
return ""
|
|
331
|
+
clean = text.strip()
|
|
332
|
+
clean = clean.strip("`")
|
|
333
|
+
if force_block or "\n" in clean or len(clean) > 80:
|
|
334
|
+
lang = language or ""
|
|
335
|
+
return f"```{lang}\n{clean}\n```"
|
|
336
|
+
return f"`{clean}`"
|
|
337
|
+
|
|
338
|
+
@staticmethod
|
|
339
|
+
def _indent_block(block: str, prefix: str) -> str:
|
|
340
|
+
if not block:
|
|
341
|
+
return ""
|
|
342
|
+
return "\n".join(prefix + line if line else prefix for line in block.splitlines())
|
|
343
|
+
|
|
344
|
+
@staticmethod
|
|
345
|
+
def _extract_counter_from_cite_id(cite_id: str, prefix: str) -> int | None:
|
|
346
|
+
"""Extract counter value from cite_id"""
|
|
347
|
+
try:
|
|
348
|
+
inner = cite_id.strip("[]")
|
|
349
|
+
if inner.startswith(f"{prefix}-"):
|
|
350
|
+
return int(inner.split("-", maxsplit=1)[1])
|
|
351
|
+
except (ValueError, IndexError):
|
|
352
|
+
return None
|
|
353
|
+
return None
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
InvestigateMemory - Analysis loop memory file system (Refactored: uses unified cite_id)
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import asdict, dataclass, field
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class KnowledgeItem:
|
|
15
|
+
"""Knowledge item (Refactored: uses cite_id)"""
|
|
16
|
+
|
|
17
|
+
cite_id: str # Citation ID, corresponds to cite_id in CitationMemory
|
|
18
|
+
tool_type: str # rag_naive | rag_hybrid | web_search | query_item
|
|
19
|
+
query: str
|
|
20
|
+
raw_result: str # Raw tool return content (complete)
|
|
21
|
+
summary: str = "" # Summary generated by NoteAgent
|
|
22
|
+
created_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
23
|
+
updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
24
|
+
|
|
25
|
+
def to_dict(self) -> dict[str, Any]:
|
|
26
|
+
return asdict(self)
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def from_dict(cls, data: dict[str, Any]) -> "KnowledgeItem":
|
|
30
|
+
# Backward compatibility: handle old version field names
|
|
31
|
+
if "knowledge_id" in data and "cite_id" not in data:
|
|
32
|
+
# Convert old knowledge_id to cite_id
|
|
33
|
+
data["cite_id"] = data.pop("knowledge_id")
|
|
34
|
+
if "source_type" in data and "tool_type" not in data:
|
|
35
|
+
data["tool_type"] = data.pop("source_type")
|
|
36
|
+
if "query_text" in data and "query" not in data:
|
|
37
|
+
data["query"] = data.pop("query_text")
|
|
38
|
+
if "answer_raw" in data and "raw_result" not in data:
|
|
39
|
+
data["raw_result"] = data.pop("answer_raw")
|
|
40
|
+
# Remove old citations field (now managed by CitationMemory)
|
|
41
|
+
if "citations" in data:
|
|
42
|
+
data.pop("citations")
|
|
43
|
+
return cls(**data)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class Reflections:
|
|
48
|
+
"""Reflections"""
|
|
49
|
+
|
|
50
|
+
remaining_questions: list[str] = field(
|
|
51
|
+
default_factory=list
|
|
52
|
+
) # Questions still needing investigation
|
|
53
|
+
updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
54
|
+
|
|
55
|
+
def to_dict(self) -> dict[str, Any]:
|
|
56
|
+
return asdict(self)
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_dict(cls, data: dict[str, Any]) -> "Reflections":
|
|
60
|
+
return cls(**data)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class InvestigateMemory:
|
|
64
|
+
"""Analysis loop memory management (Refactored: uses unified cite_id)"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
task_id: str | None = None,
|
|
69
|
+
user_question: str = "",
|
|
70
|
+
output_dir: str | None = None,
|
|
71
|
+
):
|
|
72
|
+
self.task_id = task_id or f"investigate_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
73
|
+
self.user_question = user_question
|
|
74
|
+
self.output_dir = output_dir
|
|
75
|
+
|
|
76
|
+
self.version = "3.0" # New version
|
|
77
|
+
self.created_at = datetime.now().isoformat()
|
|
78
|
+
self.updated_at = datetime.now().isoformat()
|
|
79
|
+
|
|
80
|
+
# Core data
|
|
81
|
+
self.knowledge_chain: list[KnowledgeItem] = []
|
|
82
|
+
self.reflections: Reflections = Reflections()
|
|
83
|
+
|
|
84
|
+
# Metadata (for statistics and monitoring)
|
|
85
|
+
self.metadata: dict[str, Any] = {
|
|
86
|
+
"total_iterations": 0,
|
|
87
|
+
"coverage_rate": 0.0,
|
|
88
|
+
"avg_confidence": 0.0,
|
|
89
|
+
"total_knowledge_items": 0,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# File path
|
|
93
|
+
if output_dir:
|
|
94
|
+
self.file_path = Path(output_dir) / "investigate_memory.json"
|
|
95
|
+
else:
|
|
96
|
+
self.file_path = None
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
def load_or_create(
|
|
100
|
+
cls, output_dir: str, user_question: str = "", task_id: str | None = None
|
|
101
|
+
) -> "InvestigateMemory":
|
|
102
|
+
"""Load existing memory or create new memory (supports v1.0/v2.0 backward compatibility)"""
|
|
103
|
+
file_path = Path(output_dir) / "investigate_memory.json"
|
|
104
|
+
|
|
105
|
+
if file_path.exists():
|
|
106
|
+
# Load existing memory
|
|
107
|
+
with open(file_path, encoding="utf-8") as f:
|
|
108
|
+
data = json.load(f)
|
|
109
|
+
|
|
110
|
+
memory = cls(
|
|
111
|
+
task_id=data.get("task_id"),
|
|
112
|
+
user_question=data.get("user_question", user_question),
|
|
113
|
+
output_dir=output_dir,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
file_version = data.get("version", "1.0")
|
|
117
|
+
memory.version = "3.0" # Upgrade to new version
|
|
118
|
+
memory.created_at = data.get("created_at", memory.created_at)
|
|
119
|
+
memory.updated_at = data.get("updated_at", memory.updated_at)
|
|
120
|
+
|
|
121
|
+
# Load knowledge chain (supports v1.0/v2.0 compatibility)
|
|
122
|
+
knowledge_chain_data = data.get("knowledge_chain", [])
|
|
123
|
+
memory.knowledge_chain = [
|
|
124
|
+
KnowledgeItem.from_dict(item) for item in knowledge_chain_data
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
# If v1.0, need to migrate data
|
|
128
|
+
if file_version == "1.0":
|
|
129
|
+
# Merge notes summary into knowledge_chain
|
|
130
|
+
notes_data = data.get("notes", [])
|
|
131
|
+
for note in notes_data:
|
|
132
|
+
related_knowledge_ids = note.get("related_knowledge_ids", [])
|
|
133
|
+
|
|
134
|
+
# Prioritize using related_knowledge_ids
|
|
135
|
+
for knowledge_id in related_knowledge_ids:
|
|
136
|
+
for k_item in memory.knowledge_chain:
|
|
137
|
+
# Old version uses knowledge_id, new version uses cite_id
|
|
138
|
+
if k_item.cite_id == knowledge_id:
|
|
139
|
+
if not k_item.summary:
|
|
140
|
+
k_item.summary = note.get("summary", "")
|
|
141
|
+
break
|
|
142
|
+
|
|
143
|
+
# Convert reflections to remaining_questions
|
|
144
|
+
reflections_data = data.get("reflections", [])
|
|
145
|
+
remaining_questions = []
|
|
146
|
+
for reflection in reflections_data:
|
|
147
|
+
action_items = reflection.get("action_items", [])
|
|
148
|
+
remaining_questions.extend(action_items)
|
|
149
|
+
gaps = reflection.get("gaps", [])
|
|
150
|
+
remaining_questions.extend(gaps)
|
|
151
|
+
|
|
152
|
+
# Deduplicate
|
|
153
|
+
memory.reflections.remaining_questions = list(set(remaining_questions))
|
|
154
|
+
else:
|
|
155
|
+
# v2.0/v3.0 format
|
|
156
|
+
reflections_data = data.get("reflections", {})
|
|
157
|
+
if isinstance(reflections_data, dict):
|
|
158
|
+
memory.reflections = Reflections.from_dict(reflections_data)
|
|
159
|
+
else:
|
|
160
|
+
memory.reflections = Reflections()
|
|
161
|
+
|
|
162
|
+
# Load metadata (if exists)
|
|
163
|
+
memory.metadata = data.get("metadata", memory.metadata)
|
|
164
|
+
|
|
165
|
+
return memory
|
|
166
|
+
# Create new memory
|
|
167
|
+
return cls(task_id=task_id, user_question=user_question, output_dir=output_dir)
|
|
168
|
+
|
|
169
|
+
def add_knowledge(self, item: KnowledgeItem):
|
|
170
|
+
"""Add knowledge item"""
|
|
171
|
+
self.knowledge_chain.append(item)
|
|
172
|
+
self.updated_at = datetime.now().isoformat()
|
|
173
|
+
|
|
174
|
+
def update_knowledge_summary(self, cite_id: str, summary: str):
|
|
175
|
+
"""Update knowledge item summary (called by NoteAgent)"""
|
|
176
|
+
for item in self.knowledge_chain:
|
|
177
|
+
if item.cite_id == cite_id:
|
|
178
|
+
item.summary = summary
|
|
179
|
+
item.updated_at = datetime.now().isoformat()
|
|
180
|
+
self.updated_at = datetime.now().isoformat()
|
|
181
|
+
return
|
|
182
|
+
raise ValueError(f"cite_id not found: {cite_id}")
|
|
183
|
+
|
|
184
|
+
def get_available_knowledge(
|
|
185
|
+
self, tool_types: list[str] | None = None, cite_ids: list[str] | None = None
|
|
186
|
+
) -> list[KnowledgeItem]:
|
|
187
|
+
"""Get available knowledge (supports filtering)"""
|
|
188
|
+
results = self.knowledge_chain
|
|
189
|
+
|
|
190
|
+
if tool_types:
|
|
191
|
+
results = [k for k in results if k.tool_type in tool_types]
|
|
192
|
+
|
|
193
|
+
if cite_ids:
|
|
194
|
+
results = [k for k in results if k.cite_id in cite_ids]
|
|
195
|
+
|
|
196
|
+
return results
|
|
197
|
+
|
|
198
|
+
def save(self):
|
|
199
|
+
"""Save to JSON file"""
|
|
200
|
+
if not self.file_path:
|
|
201
|
+
raise ValueError("output_dir not set, cannot save")
|
|
202
|
+
|
|
203
|
+
# Ensure directory exists
|
|
204
|
+
self.file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
205
|
+
|
|
206
|
+
self.updated_at = datetime.now().isoformat()
|
|
207
|
+
|
|
208
|
+
# Convert to dictionary
|
|
209
|
+
data = self.to_dict()
|
|
210
|
+
|
|
211
|
+
# Save to file
|
|
212
|
+
with open(self.file_path, "w", encoding="utf-8") as f:
|
|
213
|
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
214
|
+
|
|
215
|
+
def to_dict(self) -> dict[str, Any]:
|
|
216
|
+
"""Convert to dictionary"""
|
|
217
|
+
return {
|
|
218
|
+
"version": self.version,
|
|
219
|
+
"task_id": self.task_id,
|
|
220
|
+
"user_question": self.user_question,
|
|
221
|
+
"created_at": self.created_at,
|
|
222
|
+
"updated_at": self.updated_at,
|
|
223
|
+
"knowledge_chain": [item.to_dict() for item in self.knowledge_chain],
|
|
224
|
+
"reflections": self.reflections.to_dict(),
|
|
225
|
+
"metadata": self.metadata,
|
|
226
|
+
}
|