realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ Memory System - Memory file system
4
+ Provides implementations of InvestigateMemory and SolveMemory
5
+ """
6
+
7
+ from .citation_memory import (
8
+ CitationItem,
9
+ CitationMemory,
10
+ )
11
+ from .investigate_memory import (
12
+ InvestigateMemory,
13
+ KnowledgeItem,
14
+ Reflections,
15
+ )
16
+ from .solve_memory import (
17
+ SolveChainStep,
18
+ SolveMemory,
19
+ ToolCallRecord,
20
+ )
21
+
22
+ __all__ = [
23
+ # Investigate Memory
24
+ "InvestigateMemory",
25
+ "KnowledgeItem",
26
+ "Reflections",
27
+ # Solve Memory
28
+ "SolveMemory",
29
+ "SolveChainStep",
30
+ "ToolCallRecord",
31
+ # Citation Memory
32
+ "CitationMemory",
33
+ "CitationItem",
34
+ ]
@@ -0,0 +1,353 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ CitationMemory - Global citation management system
4
+ Unified management of citation information generated by all tool calls
5
+ """
6
+
7
+ from dataclasses import asdict, dataclass, field
8
+ from datetime import datetime
9
+ import json
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+
14
+ @dataclass
15
+ class CitationItem:
16
+ """Citation item"""
17
+
18
+ cite_id: str # Citation number, e.g., "[rag-1]"
19
+ tool_type: str # Tool type: rag_naive | rag_hybrid | web_search | query_item | code_execution
20
+ query: str # Query content
21
+ raw_result: str = "" # Raw tool return content (complete)
22
+ source: str = "" # Source information (if any)
23
+ content: str = "" # Citation content summary (usually summary)
24
+ stage: str = "solve" # Stage: analysis | solve
25
+ step_id: str | None = None # Belonging step (applicable to Solve stage)
26
+ metadata: dict[str, Any] = field(default_factory=dict) # Additional metadata
27
+ created_at: str = field(default_factory=lambda: datetime.now().isoformat())
28
+ updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
29
+
30
+ def to_dict(self) -> dict[str, Any]:
31
+ return asdict(self)
32
+
33
+ @classmethod
34
+ def from_dict(cls, data: dict[str, Any]) -> "CitationItem":
35
+ # Compatible with old versions without stage/step_id/metadata fields
36
+ if "stage" not in data:
37
+ data["stage"] = "solve"
38
+ if "metadata" not in data:
39
+ data["metadata"] = {}
40
+ if "updated_at" not in data:
41
+ data["updated_at"] = data.get("created_at", datetime.now().isoformat())
42
+ return cls(**data)
43
+
44
+
45
+ class CitationMemory:
46
+ """Global citation management system"""
47
+
48
+ def __init__(self, output_dir: str | None = None):
49
+ self.output_dir = output_dir
50
+ self.version = "1.1"
51
+ self.created_at = datetime.now().isoformat()
52
+ self.updated_at = datetime.now().isoformat()
53
+
54
+ # Core data: citation list
55
+ self.citations: list[CitationItem] = []
56
+
57
+ # Citation counter (incremented by tool prefix)
58
+ self.tool_counters: dict[str, int] = {}
59
+
60
+ # File path
61
+ if output_dir:
62
+ self.file_path = Path(output_dir) / "citation_memory.json"
63
+ else:
64
+ self.file_path = None
65
+
66
+ @classmethod
67
+ def load_or_create(cls, output_dir: str) -> "CitationMemory":
68
+ """Load existing citation memory or create new memory"""
69
+ file_path = Path(output_dir) / "citation_memory.json"
70
+
71
+ if file_path.exists():
72
+ # Load existing memory
73
+ with open(file_path, encoding="utf-8") as f:
74
+ data = json.load(f)
75
+
76
+ memory = cls(output_dir=output_dir)
77
+ memory.version = data.get("version", "1.1")
78
+ memory.created_at = data.get("created_at", memory.created_at)
79
+ memory.updated_at = data.get("updated_at", memory.updated_at)
80
+
81
+ # Load citation list
82
+ citations_data = data.get("citations", [])
83
+ memory.citations = [CitationItem.from_dict(item) for item in citations_data]
84
+
85
+ # Restore counters
86
+ memory.tool_counters = data.get("tool_counters", {})
87
+ if not memory.tool_counters:
88
+ # Compatible with old versions: guess from existing cite_id
89
+ for citation in memory.citations:
90
+ prefix = memory._get_tool_prefix(citation.tool_type)
91
+ number = memory._extract_counter_from_cite_id(citation.cite_id, prefix)
92
+ if number is not None:
93
+ memory.tool_counters[prefix] = max(
94
+ memory.tool_counters.get(prefix, 0), number
95
+ )
96
+
97
+ return memory
98
+ # Create new memory
99
+ return cls(output_dir=output_dir)
100
+
101
+ def add_citation(
102
+ self,
103
+ tool_type: str,
104
+ query: str,
105
+ raw_result: str = "",
106
+ source: str = "",
107
+ content: str = "",
108
+ stage: str = "solve",
109
+ step_id: str | None = None,
110
+ metadata: dict[str, Any] | None = None,
111
+ cite_id: str | None = None,
112
+ ) -> str:
113
+ """
114
+ Add new citation entry
115
+
116
+ Args:
117
+ tool_type: Tool type
118
+ query: Query content
119
+ raw_result: Raw tool return content
120
+ source: Source information
121
+ content: Citation content summary
122
+
123
+ Returns:
124
+ str: Newly generated or reused cite_id (e.g., "[rag-1]")
125
+ """
126
+ # If cite_id is provided and already exists, return directly
127
+ if cite_id and self.get_citation(cite_id):
128
+ return cite_id
129
+
130
+ # Generate new cite_id
131
+ if not cite_id:
132
+ cite_id = self._generate_cite_id(tool_type)
133
+
134
+ citation = CitationItem(
135
+ cite_id=cite_id,
136
+ tool_type=tool_type,
137
+ query=query,
138
+ raw_result=raw_result,
139
+ source=source,
140
+ content=content,
141
+ stage=stage,
142
+ step_id=step_id,
143
+ metadata=metadata or {},
144
+ )
145
+
146
+ self.citations.append(citation)
147
+ self.updated_at = datetime.now().isoformat()
148
+
149
+ return cite_id
150
+
151
+ def get_citation(self, cite_id: str) -> CitationItem | None:
152
+ """Get citation entry by cite_id"""
153
+ for citation in self.citations:
154
+ if citation.cite_id == cite_id:
155
+ return citation
156
+ return None
157
+
158
+ def get_all_citations(self) -> list[CitationItem]:
159
+ """Get all citation entries"""
160
+ return self.citations
161
+
162
+ def get_citations_by_tool_type(self, tool_type: str) -> list[CitationItem]:
163
+ """Get citation entries by tool type"""
164
+ return [c for c in self.citations if c.tool_type == tool_type]
165
+
166
+ def update_citation(
167
+ self,
168
+ cite_id: str,
169
+ raw_result: str | None = None,
170
+ source: str | None = None,
171
+ content: str | None = None,
172
+ metadata: dict[str, Any] | None = None,
173
+ stage: str | None = None,
174
+ step_id: str | None = None,
175
+ ):
176
+ """Update citation entry information"""
177
+ for citation in self.citations:
178
+ if citation.cite_id == cite_id:
179
+ if raw_result is not None:
180
+ citation.raw_result = raw_result
181
+ if source is not None:
182
+ citation.source = source
183
+ if content is not None:
184
+ citation.content = content
185
+ if metadata:
186
+ citation.metadata.update(metadata)
187
+ if stage:
188
+ citation.stage = stage
189
+ if step_id:
190
+ citation.step_id = step_id
191
+ citation.updated_at = datetime.now().isoformat()
192
+ self.updated_at = datetime.now().isoformat()
193
+ return
194
+ raise ValueError(f"cite_id not found: {cite_id}")
195
+
196
+ def save(self):
197
+ """Save to JSON file"""
198
+ if not self.file_path:
199
+ raise ValueError("output_dir not set, cannot save")
200
+
201
+ # Ensure directory exists
202
+ self.file_path.parent.mkdir(parents=True, exist_ok=True)
203
+
204
+ self.updated_at = datetime.now().isoformat()
205
+
206
+ # Convert to dictionary
207
+ data = self.to_dict()
208
+
209
+ # Save to file
210
+ with open(self.file_path, "w", encoding="utf-8") as f:
211
+ json.dump(data, f, ensure_ascii=False, indent=2)
212
+
213
+ def to_dict(self) -> dict[str, Any]:
214
+ """Convert to dictionary"""
215
+ return {
216
+ "version": self.version,
217
+ "created_at": self.created_at,
218
+ "updated_at": self.updated_at,
219
+ "citations": [item.to_dict() for item in self.citations],
220
+ "tool_counters": self.tool_counters,
221
+ }
222
+
223
+ def format_citations_markdown(
224
+ self, used_cite_ids: list[str] | None = None, language: str = "zh"
225
+ ) -> str:
226
+ """
227
+ Format citation list as Markdown
228
+
229
+ Args:
230
+ used_cite_ids: Optional, specify citation ID list to format. If provided, only format these citations; if not provided, format all citations.
231
+ language: Language code, 'zh' for Chinese, 'en' for English, default 'zh'
232
+
233
+ Returns:
234
+ str: Markdown-formatted citation list
235
+ """
236
+ # Set text based on language
237
+ if language == "en":
238
+ no_citations_text = "(No citations)"
239
+ citation_title = "## Citations\n"
240
+ stage_label = "Stage"
241
+ source_label = "Source"
242
+ query_content_label = " - Query Content:"
243
+ citation_content_label = " - Citation Content:"
244
+ else:
245
+ no_citations_text = "(No citations)"
246
+ citation_title = "## Citations\n"
247
+ stage_label = "Stage"
248
+ source_label = "Source"
249
+ query_content_label = " - Query Content:"
250
+ citation_content_label = " - Citation Content:"
251
+
252
+ if not self.citations:
253
+ return no_citations_text
254
+
255
+ # If used_cite_ids is specified, only format these citations
256
+ citations_to_format = self.citations
257
+ if used_cite_ids is not None:
258
+ cite_id_set = set(used_cite_ids)
259
+ citations_to_format = [c for c in self.citations if c.cite_id in cite_id_set]
260
+ if not citations_to_format:
261
+ return no_citations_text
262
+
263
+ lines = [citation_title]
264
+
265
+ for citation in citations_to_format:
266
+ tool_type_display = {
267
+ "rag_naive": "RAG (Naive)",
268
+ "rag_hybrid": "RAG (Hybrid)",
269
+ "web_search": "Web Search",
270
+ "query_item": "Query Item",
271
+ "code_execution": "Code Execution",
272
+ }.get(citation.tool_type, citation.tool_type)
273
+
274
+ line = f"- **{citation.cite_id}** [{tool_type_display}] {stage_label}: {citation.stage}"
275
+ if citation.source:
276
+ line += f" | {source_label}: {citation.source}"
277
+ lines.append(line)
278
+
279
+ if citation.query:
280
+ query_block = self._format_block(
281
+ citation.query,
282
+ language="python" if citation.tool_type == "code_execution" else None,
283
+ )
284
+ lines.append(query_content_label)
285
+ lines.append(self._indent_block(query_block, " "))
286
+
287
+ if citation.content:
288
+ content_block = self._format_block(
289
+ citation.content,
290
+ language="python" if citation.tool_type == "code_execution" else None,
291
+ force_block=True,
292
+ )
293
+ lines.append(citation_content_label)
294
+ lines.append(self._indent_block(content_block, " "))
295
+ elif citation.raw_result:
296
+ raw_block = self._format_block(
297
+ citation.raw_result,
298
+ language="python" if citation.tool_type == "code_execution" else None,
299
+ force_block=True,
300
+ )
301
+ lines.append(citation_content_label)
302
+ lines.append(self._indent_block(raw_block, " "))
303
+
304
+ return "\n".join(lines)
305
+
306
+ # ------------------------------------------------------------------
307
+ # Internal utility methods
308
+ # ------------------------------------------------------------------
309
+ def _get_tool_prefix(self, tool_type: str) -> str:
310
+ """Generate unified prefix based on tool type"""
311
+ prefix_map = {
312
+ "rag_naive": "rag",
313
+ "rag_hybrid": "rag",
314
+ "web_search": "web",
315
+ "code_execution": "code",
316
+ "query_item": "query",
317
+ }
318
+ normalized = prefix_map.get(tool_type, tool_type.replace("_", "-"))
319
+ return normalized.lower()
320
+
321
+ def _generate_cite_id(self, tool_type: str) -> str:
322
+ prefix = self._get_tool_prefix(tool_type)
323
+ current = self.tool_counters.get(prefix, 0) + 1
324
+ self.tool_counters[prefix] = current
325
+ return f"[{prefix}-{current}]"
326
+
327
+ @staticmethod
328
+ def _format_block(text: str, language: str | None = None, force_block: bool = False) -> str:
329
+ if not text:
330
+ return ""
331
+ clean = text.strip()
332
+ clean = clean.strip("`")
333
+ if force_block or "\n" in clean or len(clean) > 80:
334
+ lang = language or ""
335
+ return f"```{lang}\n{clean}\n```"
336
+ return f"`{clean}`"
337
+
338
+ @staticmethod
339
+ def _indent_block(block: str, prefix: str) -> str:
340
+ if not block:
341
+ return ""
342
+ return "\n".join(prefix + line if line else prefix for line in block.splitlines())
343
+
344
+ @staticmethod
345
+ def _extract_counter_from_cite_id(cite_id: str, prefix: str) -> int | None:
346
+ """Extract counter value from cite_id"""
347
+ try:
348
+ inner = cite_id.strip("[]")
349
+ if inner.startswith(f"{prefix}-"):
350
+ return int(inner.split("-", maxsplit=1)[1])
351
+ except (ValueError, IndexError):
352
+ return None
353
+ return None
@@ -0,0 +1,226 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ InvestigateMemory - Analysis loop memory file system (Refactored: uses unified cite_id)
4
+ """
5
+
6
+ from dataclasses import asdict, dataclass, field
7
+ from datetime import datetime
8
+ import json
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+
13
+ @dataclass
14
+ class KnowledgeItem:
15
+ """Knowledge item (Refactored: uses cite_id)"""
16
+
17
+ cite_id: str # Citation ID, corresponds to cite_id in CitationMemory
18
+ tool_type: str # rag_naive | rag_hybrid | web_search | query_item
19
+ query: str
20
+ raw_result: str # Raw tool return content (complete)
21
+ summary: str = "" # Summary generated by NoteAgent
22
+ created_at: str = field(default_factory=lambda: datetime.now().isoformat())
23
+ updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
24
+
25
+ def to_dict(self) -> dict[str, Any]:
26
+ return asdict(self)
27
+
28
+ @classmethod
29
+ def from_dict(cls, data: dict[str, Any]) -> "KnowledgeItem":
30
+ # Backward compatibility: handle old version field names
31
+ if "knowledge_id" in data and "cite_id" not in data:
32
+ # Convert old knowledge_id to cite_id
33
+ data["cite_id"] = data.pop("knowledge_id")
34
+ if "source_type" in data and "tool_type" not in data:
35
+ data["tool_type"] = data.pop("source_type")
36
+ if "query_text" in data and "query" not in data:
37
+ data["query"] = data.pop("query_text")
38
+ if "answer_raw" in data and "raw_result" not in data:
39
+ data["raw_result"] = data.pop("answer_raw")
40
+ # Remove old citations field (now managed by CitationMemory)
41
+ if "citations" in data:
42
+ data.pop("citations")
43
+ return cls(**data)
44
+
45
+
46
+ @dataclass
47
+ class Reflections:
48
+ """Reflections"""
49
+
50
+ remaining_questions: list[str] = field(
51
+ default_factory=list
52
+ ) # Questions still needing investigation
53
+ updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
54
+
55
+ def to_dict(self) -> dict[str, Any]:
56
+ return asdict(self)
57
+
58
+ @classmethod
59
+ def from_dict(cls, data: dict[str, Any]) -> "Reflections":
60
+ return cls(**data)
61
+
62
+
63
+ class InvestigateMemory:
64
+ """Analysis loop memory management (Refactored: uses unified cite_id)"""
65
+
66
+ def __init__(
67
+ self,
68
+ task_id: str | None = None,
69
+ user_question: str = "",
70
+ output_dir: str | None = None,
71
+ ):
72
+ self.task_id = task_id or f"investigate_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
73
+ self.user_question = user_question
74
+ self.output_dir = output_dir
75
+
76
+ self.version = "3.0" # New version
77
+ self.created_at = datetime.now().isoformat()
78
+ self.updated_at = datetime.now().isoformat()
79
+
80
+ # Core data
81
+ self.knowledge_chain: list[KnowledgeItem] = []
82
+ self.reflections: Reflections = Reflections()
83
+
84
+ # Metadata (for statistics and monitoring)
85
+ self.metadata: dict[str, Any] = {
86
+ "total_iterations": 0,
87
+ "coverage_rate": 0.0,
88
+ "avg_confidence": 0.0,
89
+ "total_knowledge_items": 0,
90
+ }
91
+
92
+ # File path
93
+ if output_dir:
94
+ self.file_path = Path(output_dir) / "investigate_memory.json"
95
+ else:
96
+ self.file_path = None
97
+
98
+ @classmethod
99
+ def load_or_create(
100
+ cls, output_dir: str, user_question: str = "", task_id: str | None = None
101
+ ) -> "InvestigateMemory":
102
+ """Load existing memory or create new memory (supports v1.0/v2.0 backward compatibility)"""
103
+ file_path = Path(output_dir) / "investigate_memory.json"
104
+
105
+ if file_path.exists():
106
+ # Load existing memory
107
+ with open(file_path, encoding="utf-8") as f:
108
+ data = json.load(f)
109
+
110
+ memory = cls(
111
+ task_id=data.get("task_id"),
112
+ user_question=data.get("user_question", user_question),
113
+ output_dir=output_dir,
114
+ )
115
+
116
+ file_version = data.get("version", "1.0")
117
+ memory.version = "3.0" # Upgrade to new version
118
+ memory.created_at = data.get("created_at", memory.created_at)
119
+ memory.updated_at = data.get("updated_at", memory.updated_at)
120
+
121
+ # Load knowledge chain (supports v1.0/v2.0 compatibility)
122
+ knowledge_chain_data = data.get("knowledge_chain", [])
123
+ memory.knowledge_chain = [
124
+ KnowledgeItem.from_dict(item) for item in knowledge_chain_data
125
+ ]
126
+
127
+ # If v1.0, need to migrate data
128
+ if file_version == "1.0":
129
+ # Merge notes summary into knowledge_chain
130
+ notes_data = data.get("notes", [])
131
+ for note in notes_data:
132
+ related_knowledge_ids = note.get("related_knowledge_ids", [])
133
+
134
+ # Prioritize using related_knowledge_ids
135
+ for knowledge_id in related_knowledge_ids:
136
+ for k_item in memory.knowledge_chain:
137
+ # Old version uses knowledge_id, new version uses cite_id
138
+ if k_item.cite_id == knowledge_id:
139
+ if not k_item.summary:
140
+ k_item.summary = note.get("summary", "")
141
+ break
142
+
143
+ # Convert reflections to remaining_questions
144
+ reflections_data = data.get("reflections", [])
145
+ remaining_questions = []
146
+ for reflection in reflections_data:
147
+ action_items = reflection.get("action_items", [])
148
+ remaining_questions.extend(action_items)
149
+ gaps = reflection.get("gaps", [])
150
+ remaining_questions.extend(gaps)
151
+
152
+ # Deduplicate
153
+ memory.reflections.remaining_questions = list(set(remaining_questions))
154
+ else:
155
+ # v2.0/v3.0 format
156
+ reflections_data = data.get("reflections", {})
157
+ if isinstance(reflections_data, dict):
158
+ memory.reflections = Reflections.from_dict(reflections_data)
159
+ else:
160
+ memory.reflections = Reflections()
161
+
162
+ # Load metadata (if exists)
163
+ memory.metadata = data.get("metadata", memory.metadata)
164
+
165
+ return memory
166
+ # Create new memory
167
+ return cls(task_id=task_id, user_question=user_question, output_dir=output_dir)
168
+
169
+ def add_knowledge(self, item: KnowledgeItem):
170
+ """Add knowledge item"""
171
+ self.knowledge_chain.append(item)
172
+ self.updated_at = datetime.now().isoformat()
173
+
174
+ def update_knowledge_summary(self, cite_id: str, summary: str):
175
+ """Update knowledge item summary (called by NoteAgent)"""
176
+ for item in self.knowledge_chain:
177
+ if item.cite_id == cite_id:
178
+ item.summary = summary
179
+ item.updated_at = datetime.now().isoformat()
180
+ self.updated_at = datetime.now().isoformat()
181
+ return
182
+ raise ValueError(f"cite_id not found: {cite_id}")
183
+
184
+ def get_available_knowledge(
185
+ self, tool_types: list[str] | None = None, cite_ids: list[str] | None = None
186
+ ) -> list[KnowledgeItem]:
187
+ """Get available knowledge (supports filtering)"""
188
+ results = self.knowledge_chain
189
+
190
+ if tool_types:
191
+ results = [k for k in results if k.tool_type in tool_types]
192
+
193
+ if cite_ids:
194
+ results = [k for k in results if k.cite_id in cite_ids]
195
+
196
+ return results
197
+
198
+ def save(self):
199
+ """Save to JSON file"""
200
+ if not self.file_path:
201
+ raise ValueError("output_dir not set, cannot save")
202
+
203
+ # Ensure directory exists
204
+ self.file_path.parent.mkdir(parents=True, exist_ok=True)
205
+
206
+ self.updated_at = datetime.now().isoformat()
207
+
208
+ # Convert to dictionary
209
+ data = self.to_dict()
210
+
211
+ # Save to file
212
+ with open(self.file_path, "w", encoding="utf-8") as f:
213
+ json.dump(data, f, ensure_ascii=False, indent=2)
214
+
215
+ def to_dict(self) -> dict[str, Any]:
216
+ """Convert to dictionary"""
217
+ return {
218
+ "version": self.version,
219
+ "task_id": self.task_id,
220
+ "user_question": self.user_question,
221
+ "created_at": self.created_at,
222
+ "updated_at": self.updated_at,
223
+ "knowledge_chain": [item.to_dict() for item in self.knowledge_chain],
224
+ "reflections": self.reflections.to_dict(),
225
+ "metadata": self.metadata,
226
+ }