realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,451 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ DR-in-KG 2.0 Core Data Structures
5
+ Includes: TopicBlock, ToolTrace, DynamicTopicQueue
6
+ """
7
+
8
+ from dataclasses import asdict, dataclass, field
9
+ from datetime import datetime
10
+ from enum import Enum
11
+ import json
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+
16
+ class TopicStatus(Enum):
17
+ """Topic block status enumeration"""
18
+
19
+ PENDING = "pending" # Pending research
20
+ RESEARCHING = "researching" # Researching
21
+ COMPLETED = "completed" # Completed
22
+ FAILED = "failed" # Failed
23
+
24
+
25
+ class ToolType(Enum):
26
+ """Tool type enumeration"""
27
+
28
+ RAG_NAIVE = "rag_naive"
29
+ RAG_HYBRID = "rag_hybrid"
30
+ QUERY_ITEM = "query_item"
31
+ PAPER_SEARCH = "paper_search"
32
+ RUN_CODE = "run_code"
33
+ WEB_SEARCH = "web_search"
34
+
35
+
36
+ # Default max size for raw_answer (50KB)
37
+ DEFAULT_RAW_ANSWER_MAX_SIZE = 50 * 1024
38
+
39
+
40
+ @dataclass
41
+ class ToolTrace:
42
+ """
43
+ Tool trace - Records complete loop of a single tool call
44
+ """
45
+
46
+ tool_id: str # Unique identifier (e.g., "tool_1", "tool_2")
47
+ citation_id: str # Citation ID (for report citations and anchors, e.g., CIT-1-01)
48
+ tool_type: str # Tool type (rag_naive, web_search, etc.)
49
+ query: str # Query statement issued
50
+ raw_answer: str # Raw detailed result returned by tool (may be truncated)
51
+ summary: str # Core summary generated by Note Agent
52
+ timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
53
+ raw_answer_truncated: bool = field(default=False) # Whether raw_answer was truncated
54
+ raw_answer_original_size: int = field(default=0) # Original size before truncation
55
+
56
+ def __post_init__(self):
57
+ """Post-initialization to handle raw_answer size limit"""
58
+ if self.raw_answer_original_size == 0:
59
+ self.raw_answer_original_size = len(self.raw_answer)
60
+
61
+ # Truncate if needed
62
+ if len(self.raw_answer) > DEFAULT_RAW_ANSWER_MAX_SIZE:
63
+ self.raw_answer = self._truncate_raw_answer(
64
+ self.raw_answer, DEFAULT_RAW_ANSWER_MAX_SIZE
65
+ )
66
+ self.raw_answer_truncated = True
67
+
68
+ @staticmethod
69
+ def _truncate_raw_answer(raw_answer: str, max_size: int) -> str:
70
+ """
71
+ Truncate raw_answer while trying to preserve valid JSON structure
72
+
73
+ Args:
74
+ raw_answer: Original raw answer string
75
+ max_size: Maximum size in bytes
76
+
77
+ Returns:
78
+ Truncated string
79
+ """
80
+ if len(raw_answer) <= max_size:
81
+ return raw_answer
82
+
83
+ # Try to parse as JSON and truncate intelligently
84
+ try:
85
+ data = json.loads(raw_answer)
86
+
87
+ # If it's a dict with common RAG response fields, truncate content fields
88
+ if isinstance(data, dict):
89
+ # Truncate long content fields
90
+ content_fields = ["answer", "content", "text", "chunks", "documents"]
91
+ for field_name in content_fields:
92
+ if field_name in data:
93
+ if (
94
+ isinstance(data[field_name], str)
95
+ and len(data[field_name]) > max_size // 2
96
+ ):
97
+ data[field_name] = data[field_name][: max_size // 2] + "... [truncated]"
98
+ elif isinstance(data[field_name], list):
99
+ # Keep only first few items
100
+ data[field_name] = data[field_name][:3]
101
+ if data[field_name]:
102
+ data[field_name].append({"note": "... additional items truncated"})
103
+
104
+ truncated = json.dumps(data, ensure_ascii=False)
105
+ if len(truncated) <= max_size:
106
+ return truncated
107
+ except (json.JSONDecodeError, TypeError):
108
+ pass
109
+
110
+ # Fallback: simple truncation with marker
111
+ truncation_marker = "\n... [content truncated, original size: {} bytes]".format(
112
+ len(raw_answer)
113
+ )
114
+ return raw_answer[: max_size - len(truncation_marker)] + truncation_marker
115
+
116
+ def to_dict(self) -> dict[str, Any]:
117
+ """Convert to dictionary"""
118
+ return asdict(self)
119
+
120
+ @classmethod
121
+ def from_dict(cls, data: dict[str, Any]) -> "ToolTrace":
122
+ """Create from dictionary"""
123
+ # Handle backward compatibility - old data may not have new fields
124
+ if "raw_answer_truncated" not in data:
125
+ data["raw_answer_truncated"] = False
126
+ if "raw_answer_original_size" not in data:
127
+ data["raw_answer_original_size"] = len(data.get("raw_answer", ""))
128
+ return cls(**data)
129
+
130
+ @classmethod
131
+ def create_with_size_limit(
132
+ cls,
133
+ tool_id: str,
134
+ citation_id: str,
135
+ tool_type: str,
136
+ query: str,
137
+ raw_answer: str,
138
+ summary: str,
139
+ max_size: int = DEFAULT_RAW_ANSWER_MAX_SIZE,
140
+ ) -> "ToolTrace":
141
+ """
142
+ Create a ToolTrace with explicit size limit
143
+
144
+ Args:
145
+ tool_id: Tool ID
146
+ citation_id: Citation ID
147
+ tool_type: Tool type
148
+ query: Query string
149
+ raw_answer: Raw answer (will be truncated if needed)
150
+ summary: Summary
151
+ max_size: Maximum size for raw_answer
152
+
153
+ Returns:
154
+ ToolTrace instance
155
+ """
156
+ original_size = len(raw_answer)
157
+ truncated = len(raw_answer) > max_size
158
+
159
+ if truncated:
160
+ raw_answer = cls._truncate_raw_answer(raw_answer, max_size)
161
+
162
+ return cls(
163
+ tool_id=tool_id,
164
+ citation_id=citation_id,
165
+ tool_type=tool_type,
166
+ query=query,
167
+ raw_answer=raw_answer,
168
+ summary=summary,
169
+ raw_answer_truncated=truncated,
170
+ raw_answer_original_size=original_size,
171
+ )
172
+
173
+
174
+ @dataclass
175
+ class TopicBlock:
176
+ """
177
+ Topic block - Minimum scheduling unit in queue
178
+ """
179
+
180
+ block_id: str # Unique identifier (e.g., "block_1", "block_2")
181
+ sub_topic: str # Sub-topic name
182
+ overview: str # Topic overview/background
183
+ status: TopicStatus = TopicStatus.PENDING # Topic status
184
+ tool_traces: list[ToolTrace] = field(default_factory=list) # Tool call trace list
185
+ iteration_count: int = 0 # Current iteration count
186
+ created_at: str = field(default_factory=lambda: datetime.now().isoformat())
187
+ updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
188
+ metadata: dict[str, Any] = field(default_factory=dict) # Additional metadata
189
+
190
+ def add_tool_trace(self, trace: ToolTrace) -> None:
191
+ """Add tool trace"""
192
+ self.tool_traces.append(trace)
193
+ self.updated_at = datetime.now().isoformat()
194
+
195
+ def get_latest_trace(self) -> ToolTrace | None:
196
+ """Get latest tool trace"""
197
+ return self.tool_traces[-1] if self.tool_traces else None
198
+
199
+ def get_all_summaries(self) -> str:
200
+ """Get concatenated summaries of all tool traces"""
201
+ if not self.tool_traces:
202
+ return ""
203
+ return "\n".join([f"[{trace.tool_type}] {trace.summary}" for trace in self.tool_traces])
204
+
205
+ def to_dict(self) -> dict[str, Any]:
206
+ """Convert to dictionary"""
207
+ data = asdict(self)
208
+ data["status"] = self.status.value
209
+ data["tool_traces"] = [trace.to_dict() for trace in self.tool_traces]
210
+ return data
211
+
212
+ @classmethod
213
+ def from_dict(cls, data: dict[str, Any]) -> "TopicBlock":
214
+ """Create from dictionary"""
215
+ data_copy = data.copy()
216
+ if isinstance(data_copy.get("status"), str):
217
+ data_copy["status"] = TopicStatus(data_copy["status"])
218
+ if "tool_traces" in data_copy:
219
+ data_copy["tool_traces"] = [
220
+ ToolTrace.from_dict(t) if isinstance(t, dict) else t
221
+ for t in data_copy["tool_traces"]
222
+ ]
223
+ return cls(**data_copy)
224
+
225
+
226
+ class DynamicTopicQueue:
227
+ """
228
+ Dynamic topic queue - Core memory and scheduling center of the system
229
+ """
230
+
231
+ def __init__(
232
+ self, research_id: str, max_length: int | None = None, state_file: str | None = None
233
+ ):
234
+ """
235
+ Initialize queue
236
+
237
+ Args:
238
+ research_id: Research task ID
239
+ max_length: Maximum queue length (None means unlimited)
240
+ state_file: Auto-persistence file path
241
+ """
242
+ self.research_id = research_id
243
+ self.blocks: list[TopicBlock] = []
244
+ self.block_counter = 0
245
+ self.created_at = datetime.now().isoformat()
246
+ self.max_length = max_length if isinstance(max_length, int) and max_length > 0 else None
247
+ self.state_file = state_file
248
+
249
+ def set_state_file(self, filepath: str | None) -> None:
250
+ """Set queue auto-persistence file"""
251
+ self.state_file = filepath
252
+ self._auto_save()
253
+
254
+ @staticmethod
255
+ def _normalize_topic(text: str) -> str:
256
+ return (text or "").strip().lower()
257
+
258
+ def add_block(self, sub_topic: str, overview: str) -> TopicBlock:
259
+ """
260
+ Add new topic block to the end of queue
261
+
262
+ Args:
263
+ sub_topic: Sub-topic name
264
+ overview: Topic overview
265
+
266
+ Returns:
267
+ Created TopicBlock
268
+ """
269
+ if self.max_length and len(self.blocks) >= self.max_length:
270
+ raise RuntimeError(
271
+ f"Queue has reached maximum capacity ({self.max_length}), cannot add new topic."
272
+ )
273
+ self.block_counter += 1
274
+ block_id = f"block_{self.block_counter}"
275
+ block = TopicBlock(block_id=block_id, sub_topic=sub_topic, overview=overview)
276
+ self.blocks.append(block)
277
+ self._auto_save()
278
+ return block
279
+
280
+ def has_topic(self, sub_topic: str) -> bool:
281
+ """Check if topic already exists (case-insensitive, ignoring leading/trailing spaces)"""
282
+ target = self._normalize_topic(sub_topic)
283
+ if not target:
284
+ return False
285
+ return any(self._normalize_topic(b.sub_topic) == target for b in self.blocks)
286
+
287
+ def list_topics(self) -> list[str]:
288
+ """List all current topic titles"""
289
+ return [b.sub_topic for b in self.blocks]
290
+
291
+ def get_pending_block(self) -> TopicBlock | None:
292
+ """
293
+ Get first pending topic block
294
+
295
+ Returns:
296
+ First TopicBlock with PENDING status, or None if not found
297
+ """
298
+ for block in self.blocks:
299
+ if block.status == TopicStatus.PENDING:
300
+ return block
301
+ return None
302
+
303
+ def get_block_by_id(self, block_id: str) -> TopicBlock | None:
304
+ """
305
+ Get topic block by ID
306
+
307
+ Args:
308
+ block_id: Topic block ID
309
+
310
+ Returns:
311
+ Corresponding TopicBlock, or None if not found
312
+ """
313
+ for block in self.blocks:
314
+ if block.block_id == block_id:
315
+ return block
316
+ return None
317
+
318
+ def mark_researching(self, block_id: str) -> bool:
319
+ """
320
+ Mark topic block as researching
321
+
322
+ Args:
323
+ block_id: Topic block ID
324
+
325
+ Returns:
326
+ Whether marking was successful
327
+ """
328
+ block = self.get_block_by_id(block_id)
329
+ if block:
330
+ block.status = TopicStatus.RESEARCHING
331
+ block.updated_at = datetime.now().isoformat()
332
+ self._auto_save()
333
+ return True
334
+ return False
335
+
336
+ def mark_completed(self, block_id: str) -> bool:
337
+ """
338
+ Mark topic block as completed
339
+
340
+ Args:
341
+ block_id: Topic block ID
342
+
343
+ Returns:
344
+ Whether marking was successful
345
+ """
346
+ block = self.get_block_by_id(block_id)
347
+ if block:
348
+ block.status = TopicStatus.COMPLETED
349
+ block.updated_at = datetime.now().isoformat()
350
+ self._auto_save()
351
+ return True
352
+ return False
353
+
354
+ def mark_failed(self, block_id: str) -> bool:
355
+ """
356
+ Mark topic block as failed
357
+
358
+ Args:
359
+ block_id: Topic block ID
360
+
361
+ Returns:
362
+ Whether marking was successful
363
+ """
364
+ block = self.get_block_by_id(block_id)
365
+ if block:
366
+ block.status = TopicStatus.FAILED
367
+ block.updated_at = datetime.now().isoformat()
368
+ self._auto_save()
369
+ return True
370
+ return False
371
+
372
+ def get_all_completed_blocks(self) -> list[TopicBlock]:
373
+ """Get all completed topic blocks"""
374
+ return [b for b in self.blocks if b.status == TopicStatus.COMPLETED]
375
+
376
+ def get_all_pending_blocks(self) -> list[TopicBlock]:
377
+ """Get all pending topic blocks"""
378
+ return [b for b in self.blocks if b.status == TopicStatus.PENDING]
379
+
380
+ def is_all_completed(self) -> bool:
381
+ """Check if all topic blocks are completed"""
382
+ if not self.blocks:
383
+ return False
384
+ return all(b.status == TopicStatus.COMPLETED for b in self.blocks)
385
+
386
+ def get_statistics(self) -> dict[str, Any]:
387
+ """Get queue statistics"""
388
+ return {
389
+ "total_blocks": len(self.blocks),
390
+ "pending": len(self.get_all_pending_blocks()),
391
+ "researching": len([b for b in self.blocks if b.status == TopicStatus.RESEARCHING]),
392
+ "completed": len(self.get_all_completed_blocks()),
393
+ "failed": len([b for b in self.blocks if b.status == TopicStatus.FAILED]),
394
+ "total_tool_calls": sum(len(b.tool_traces) for b in self.blocks),
395
+ }
396
+
397
+ def to_dict(self) -> dict[str, Any]:
398
+ """Convert to dictionary"""
399
+ return {
400
+ "research_id": self.research_id,
401
+ "created_at": self.created_at,
402
+ "blocks": [b.to_dict() for b in self.blocks],
403
+ "statistics": self.get_statistics(),
404
+ }
405
+
406
+ @classmethod
407
+ def from_dict(cls, data: dict[str, Any]) -> "DynamicTopicQueue":
408
+ """Create from dictionary"""
409
+ queue = cls(data["research_id"])
410
+ queue.created_at = data.get("created_at", queue.created_at)
411
+ for block_data in data.get("blocks", []):
412
+ block = TopicBlock.from_dict(block_data)
413
+ queue.blocks.append(block)
414
+ # Update counter
415
+ if block.block_id.startswith("block_"):
416
+ try:
417
+ block_num = int(block.block_id.split("_")[1])
418
+ queue.block_counter = max(queue.block_counter, block_num)
419
+ except (ValueError, IndexError):
420
+ pass
421
+ return queue
422
+
423
+ def save_to_json(self, filepath: str) -> None:
424
+ """Save queue to JSON file"""
425
+ Path(filepath).parent.mkdir(parents=True, exist_ok=True)
426
+ with open(filepath, "w", encoding="utf-8") as f:
427
+ json.dump(self.to_dict(), f, ensure_ascii=False, indent=2)
428
+
429
+ def _auto_save(self) -> None:
430
+ """Auto-save if state_file is set"""
431
+ if self.state_file:
432
+ try:
433
+ self.save_to_json(self.state_file)
434
+ except Exception as exc:
435
+ print(f"⚠️ Failed to save queue progress: {exc}")
436
+
437
+ @classmethod
438
+ def load_from_json(cls, filepath: str) -> "DynamicTopicQueue":
439
+ """Load queue from JSON file"""
440
+ with open(filepath, encoding="utf-8") as f:
441
+ data = json.load(f)
442
+ return cls.from_dict(data)
443
+
444
+
445
+ __all__ = [
446
+ "DynamicTopicQueue",
447
+ "ToolTrace",
448
+ "ToolType",
449
+ "TopicBlock",
450
+ "TopicStatus",
451
+ ]
@@ -0,0 +1,188 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ DR-in-KG 2.0 - Main Entry
5
+ Deep research system based on dynamic topic queue
6
+
7
+ Usage:
8
+ python main.py --topic "Research Topic" [--preset quick/standard/deep]
9
+ """
10
+
11
+ import argparse
12
+ import asyncio
13
+ from pathlib import Path
14
+ import sys
15
+
16
+ from dotenv import load_dotenv
17
+ import yaml
18
+
19
+ from src.agents.research.research_pipeline import ResearchPipeline
20
+ from src.services.llm import get_llm_config
21
+
22
+
23
+ def load_config(config_path: str = None, preset: str = None) -> dict:
24
+ """
25
+ Load configuration file (with main.yaml merge)
26
+
27
+ Args:
28
+ config_path: Configuration file path (default: config/research_config.yaml)
29
+ preset: Preset mode (quick/standard/deep)
30
+
31
+ Returns:
32
+ Configuration dictionary (merged with main.yaml)
33
+ """
34
+ if config_path is None:
35
+ project_root = Path(__file__).parent.parent.parent.parent
36
+ from src.services.config import load_config_with_main
37
+
38
+ config = load_config_with_main("research_config.yaml", project_root)
39
+ else:
40
+ # If custom config path provided, load it directly (for backward compatibility)
41
+ config_file = Path(config_path)
42
+ if not config_file.exists():
43
+ raise FileNotFoundError(f"Configuration file not found: {config_file}")
44
+ with open(config_file, encoding="utf-8") as f:
45
+ config = yaml.safe_load(f) or {}
46
+
47
+ # Apply preset
48
+ if preset and "presets" in config and preset in config["presets"]:
49
+ print(f"✓ Applied preset configuration: {preset}")
50
+ preset_config = config["presets"][preset]
51
+ for key, value in preset_config.items():
52
+ if key in config and isinstance(value, dict):
53
+ config[key].update(value)
54
+
55
+ return config
56
+
57
+
58
+ def display_config(config: dict):
59
+ """Display current configuration"""
60
+ print("\n" + "=" * 70)
61
+ print("📋 Current Configuration")
62
+ print("=" * 70)
63
+
64
+ planning = config.get("planning", {})
65
+ researching = config.get("researching", {})
66
+ reporting = config.get("reporting", {})
67
+
68
+ print("【Planning Configuration】")
69
+ print(f" Initial subtopics: {planning.get('decompose', {}).get('initial_subtopics', 5)}")
70
+ print(f" Max subtopics: {planning.get('decompose', {}).get('max_subtopics', 10)}")
71
+
72
+ print("\n【Researching Configuration】")
73
+ print(f" Max iterations: {researching.get('max_iterations', 5)}")
74
+ print(f" Research mode: {researching.get('research_mode', 'deep')}")
75
+ print(" Enabled tools:")
76
+ print(f" - RAG: {researching.get('enable_rag_hybrid', True)}")
77
+ print(f" - Web Search: {researching.get('enable_web_search', True)}")
78
+ print(f" - Paper Search: {researching.get('enable_paper_search', True)}")
79
+
80
+ print("\n【Reporting Configuration】")
81
+ print(f" Min section length: {reporting.get('min_section_length', 500)} characters")
82
+ print(f" Enable topic deduplication: {reporting.get('enable_deduplication', True)}")
83
+
84
+ print("=" * 70 + "\n")
85
+
86
+
87
+ async def main():
88
+ """Main function"""
89
+ # Parse command line arguments
90
+ parser = argparse.ArgumentParser(
91
+ description="DR-in-KG 2.0 - Deep research system based on dynamic topic queue",
92
+ formatter_class=argparse.RawDescriptionHelpFormatter,
93
+ epilog="""
94
+ Examples:
95
+ # Quick mode
96
+ python main.py --topic "Deep Learning Basics" --preset quick
97
+
98
+ # Standard mode
99
+ python main.py --topic "Transformer Architecture" --preset standard
100
+
101
+ # Deep mode
102
+ python main.py --topic "Graph Neural Networks" --preset deep
103
+ """,
104
+ )
105
+
106
+ parser.add_argument("--topic", type=str, required=True, help="Research topic")
107
+
108
+ parser.add_argument(
109
+ "--config",
110
+ type=str,
111
+ default="config.yaml",
112
+ help="Configuration file path (default: config.yaml)",
113
+ )
114
+
115
+ parser.add_argument(
116
+ "--preset",
117
+ type=str,
118
+ choices=["quick", "standard", "deep"],
119
+ help="Preset configuration (quick: fast, standard: standard, deep: deep)",
120
+ )
121
+
122
+ parser.add_argument("--output-dir", type=str, help="Output directory (overrides config file)")
123
+
124
+ args = parser.parse_args()
125
+
126
+ # Load environment variables
127
+ load_dotenv()
128
+
129
+ # Check API configuration
130
+ try:
131
+ llm_config = get_llm_config()
132
+ except ValueError as e:
133
+ print(f"✗ Error: {e}")
134
+ print("Please configure in .env or DeepTutor.env file:")
135
+ print(" LLM_MODEL=gpt-4o")
136
+ print(" LLM_API_KEY=your_api_key_here")
137
+ print(" LLM_HOST=https://api.openai.com/v1")
138
+ sys.exit(1)
139
+
140
+ # Load configuration
141
+ try:
142
+ config = load_config(args.config, args.preset)
143
+ except Exception as e:
144
+ print(f"✗ Failed to load configuration: {e!s}")
145
+ sys.exit(1)
146
+
147
+ # Override configuration (command line arguments take priority)
148
+ if args.output_dir:
149
+ config["system"]["output_base_dir"] = args.output_dir
150
+ config["system"]["reports_dir"] = args.output_dir
151
+
152
+ # Display configuration
153
+ display_config(config)
154
+
155
+ # Create research pipeline
156
+ pipeline = ResearchPipeline(
157
+ config=config, api_key=llm_config.api_key, base_url=llm_config.base_url
158
+ )
159
+
160
+ # Execute research
161
+ try:
162
+ result = await pipeline.run(topic=args.topic)
163
+
164
+ print("\n" + "=" * 70)
165
+ print("✓ Research completed!")
166
+ print("=" * 70)
167
+ print(f"Research ID: {result['research_id']}")
168
+ print(f"Topic: {result['topic']}")
169
+ print(f"Final Report: {result['final_report_path']}")
170
+ print("=" * 70 + "\n")
171
+
172
+ except KeyboardInterrupt:
173
+ print("\n\n⚠️ Research interrupted by user")
174
+ sys.exit(0)
175
+ except Exception as e:
176
+ print(f"\n\n✗ Research failed: {e!s}")
177
+ import traceback
178
+
179
+ traceback.print_exc()
180
+ sys.exit(1)
181
+
182
+
183
+ if __name__ == "__main__":
184
+ # Windows compatibility
185
+ if sys.platform == "win32":
186
+ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
187
+
188
+ asyncio.run(main())