realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,536 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ Run Code Tool - Code execution tool
4
+ Execute Python code in isolated workspace, preserving original input/output structure.
5
+ """
6
+
7
+ import ast
8
+ import asyncio
9
+ from contextlib import contextmanager
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime
12
+ import os
13
+ from pathlib import Path
14
+ import subprocess
15
+ import sys
16
+ import tempfile
17
+ import time
18
+ from typing import Any
19
+
20
+ RUN_CODE_WORKSPACE_ENV = "RUN_CODE_WORKSPACE"
21
+ RUN_CODE_ALLOWED_ROOTS_ENV = "RUN_CODE_ALLOWED_ROOTS"
22
+ DEFAULT_WORKSPACE_NAME = "run_code_workspace"
23
+ PROJECT_ROOT = Path(__file__).resolve().parents[2]
24
+
25
+ from src.logging import get_logger
26
+
27
+ logger = get_logger("CodeExecutor")
28
+
29
+
30
+ def _load_config() -> dict[str, Any]:
31
+ """Load run_code configuration from main.yaml and module configs"""
32
+ try:
33
+ from src.services.config import load_config_with_main
34
+
35
+ # Try loading from solve_config (most common use case)
36
+ try:
37
+ config = load_config_with_main("solve_config.yaml", PROJECT_ROOT)
38
+ run_code_config = config.get("tools", {}).get("run_code", {})
39
+ if run_code_config:
40
+ logger.debug("Loaded run_code config from solve_config.yaml (with main.yaml)")
41
+ return run_code_config
42
+ except Exception as e:
43
+ logger.debug(f"Failed to load from solve_config: {e}")
44
+
45
+ # Fallback to question_config
46
+ try:
47
+ config = load_config_with_main("question_config.yaml", PROJECT_ROOT)
48
+ run_code_config = config.get("tools", {}).get("run_code", {})
49
+ if run_code_config:
50
+ logger.debug("Loaded run_code config from question_config.yaml (with main.yaml)")
51
+ return run_code_config
52
+ except Exception as e:
53
+ logger.debug(f"Failed to load from question_config: {e}")
54
+
55
+ # Fallback to main.yaml only
56
+ try:
57
+ config = load_config_with_main("solve_config.yaml", PROJECT_ROOT)
58
+ run_code_config = config.get("tools", {}).get("run_code", {})
59
+ if run_code_config:
60
+ return run_code_config
61
+ except Exception:
62
+ pass
63
+
64
+ except ImportError:
65
+ logger.debug("config_loader not available, using fallback")
66
+
67
+ # Fallback: try loading main.yaml directly
68
+ try:
69
+ import yaml
70
+
71
+ main_config_path = PROJECT_ROOT / "config" / "main.yaml"
72
+ if main_config_path.exists():
73
+ with open(main_config_path, encoding="utf-8") as f:
74
+ config = yaml.safe_load(f) or {}
75
+ run_code_config = config.get("tools", {}).get("run_code", {})
76
+ if run_code_config:
77
+ logger.debug("Loaded run_code config from main.yaml")
78
+ return run_code_config
79
+ except Exception as e:
80
+ logger.debug(f"Failed to load from main.yaml: {e}")
81
+
82
+ return {}
83
+
84
+
85
+ class CodeExecutionError(Exception):
86
+ """Code execution error"""
87
+
88
+
89
+ @dataclass
90
+ class OperationEntry:
91
+ action: str
92
+ details: dict[str, Any]
93
+ timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
94
+
95
+
96
+ class OperationLogger:
97
+ """Simple operation history logger, inspired by code_implementation_server recording method"""
98
+
99
+ def __init__(self, max_entries: int = 200):
100
+ self._history: list[OperationEntry] = []
101
+ self._max_entries = max_entries
102
+
103
+ def log(self, action: str, details: dict[str, Any]):
104
+ entry = OperationEntry(action=action, details=details)
105
+ self._history.append(entry)
106
+ if len(self._history) > self._max_entries:
107
+ self._history.pop(0)
108
+ logger.debug(f"Operation logged: {action} | details={details.get('status')}")
109
+
110
+ @property
111
+ def history(self) -> list[OperationEntry]:
112
+ return list(self._history)
113
+
114
+
115
+ class WorkspaceManager:
116
+ """Manages isolated workspace, similar to code_implementation_server workspace logic"""
117
+
118
+ def __init__(self):
119
+ # Load configuration (priority: environment variable > config file > default)
120
+ config = _load_config()
121
+
122
+ # Determine workspace directory (priority: environment variable > config file > default)
123
+ env_path = os.getenv(RUN_CODE_WORKSPACE_ENV)
124
+ if env_path:
125
+ self.base_dir = Path(env_path).expanduser().resolve()
126
+ else:
127
+ config_workspace = config.get("workspace")
128
+ if config_workspace:
129
+ # Support relative paths (relative to project root) and absolute paths
130
+ workspace_path = Path(config_workspace).expanduser()
131
+ if workspace_path.is_absolute():
132
+ self.base_dir = workspace_path.resolve()
133
+ else:
134
+ self.base_dir = (PROJECT_ROOT / workspace_path).resolve()
135
+ else:
136
+ # Default workspace is set under user directory
137
+ self.base_dir = (PROJECT_ROOT / "data" / "user" / DEFAULT_WORKSPACE_NAME).resolve()
138
+
139
+ # Determine allowed root paths list
140
+ # Default includes project root and user directory
141
+ self.allowed_roots: list[Path] = [
142
+ PROJECT_ROOT.resolve(),
143
+ (PROJECT_ROOT / "data" / "user").resolve(),
144
+ ]
145
+
146
+ # Read allowed root paths from config file
147
+ config_allowed_roots = config.get("allowed_roots", [])
148
+ if isinstance(config_allowed_roots, str):
149
+ config_allowed_roots = [config_allowed_roots]
150
+ for root_path in config_allowed_roots:
151
+ root = Path(root_path).expanduser()
152
+ if root.is_absolute():
153
+ resolved_root = root.resolve()
154
+ else:
155
+ resolved_root = (PROJECT_ROOT / root).resolve()
156
+ # Avoid duplicate addition
157
+ if resolved_root not in self.allowed_roots:
158
+ self.allowed_roots.append(resolved_root)
159
+
160
+ # Read additional allowed root paths from environment variables
161
+ extra_roots = os.getenv(RUN_CODE_ALLOWED_ROOTS_ENV)
162
+ if extra_roots:
163
+ for raw_path in extra_roots.split(os.pathsep):
164
+ raw_path = raw_path.strip()
165
+ if raw_path:
166
+ path = Path(raw_path).expanduser()
167
+ if path.is_absolute():
168
+ resolved_path = path.resolve()
169
+ else:
170
+ resolved_path = (PROJECT_ROOT / path).resolve()
171
+ # Avoid duplicate addition
172
+ if resolved_path not in self.allowed_roots:
173
+ self.allowed_roots.append(resolved_path)
174
+
175
+ # Ensure workspace directory itself can also be a valid root path
176
+ if self.base_dir not in self.allowed_roots:
177
+ self.allowed_roots.append(self.base_dir)
178
+
179
+ self._initialized = False
180
+
181
+ def initialize(self):
182
+ if not self._initialized:
183
+ self.base_dir.mkdir(parents=True, exist_ok=True)
184
+ self._initialized = True
185
+ logger.info(f"Run-code workspace initialized at {self.base_dir}")
186
+
187
+ def ensure_initialized(self):
188
+ if not self._initialized:
189
+ self.initialize()
190
+
191
+ @contextmanager
192
+ def create_temp_dir(self) -> Path:
193
+ self.ensure_initialized()
194
+ with tempfile.TemporaryDirectory(dir=self.base_dir) as temp_dir:
195
+ yield Path(temp_dir)
196
+
197
+ def resolve_assets_dir(self, assets_dir: str | None) -> Path | None:
198
+ if not assets_dir:
199
+ return None
200
+ path = Path(assets_dir).expanduser()
201
+ if not path.is_absolute():
202
+ path = (self.base_dir / path).resolve()
203
+ self._ensure_within_allowed_roots(path)
204
+ path.mkdir(parents=True, exist_ok=True)
205
+ return path
206
+
207
+ def collect_artifacts(self, assets_dir: Path | None) -> tuple[list[str], list[str]]:
208
+ artifacts: list[str] = []
209
+ artifact_paths: list[str] = []
210
+ if not assets_dir or not assets_dir.exists():
211
+ return artifacts, artifact_paths
212
+
213
+ for file_path in assets_dir.iterdir():
214
+ if file_path.is_file() and file_path.name != ".gitkeep":
215
+ artifacts.append(str(file_path.relative_to(assets_dir)))
216
+ artifact_paths.append(str(file_path.resolve()))
217
+ return artifacts, artifact_paths
218
+
219
+ def _ensure_within_allowed_roots(self, path: Path):
220
+ resolved_path = path.resolve()
221
+ for root in self.allowed_roots:
222
+ # Use Path object methods for path comparison, avoiding Windows path separator and case issues
223
+ try:
224
+ # Python 3.9+ use is_relative_to
225
+ if hasattr(resolved_path, "is_relative_to"):
226
+ if resolved_path.is_relative_to(root):
227
+ return
228
+ else:
229
+ # Python < 3.9 use path comparison after resolve()
230
+ # Convert to lowercase and normalize path separators for comparison (Windows compatible)
231
+ resolved_str = str(resolved_path).lower().replace("\\", "/")
232
+ root_str = str(root.resolve()).lower().replace("\\", "/")
233
+ if resolved_str.startswith(root_str):
234
+ return
235
+ except (ValueError, AttributeError):
236
+ # If is_relative_to fails, fallback to string comparison
237
+ resolved_str = str(resolved_path).lower().replace("\\", "/")
238
+ root_str = str(root.resolve()).lower().replace("\\", "/")
239
+ if resolved_str.startswith(root_str):
240
+ return
241
+ allowed = "\n".join(str(root) for root in self.allowed_roots)
242
+ raise ValueError(
243
+ f"Assets directory {resolved_path} must be located under one of the following allowed paths:\n{allowed}"
244
+ )
245
+
246
+
247
+ class ImportGuard:
248
+ """Parse AST, restrict import modules, ensure consistency with allowed_imports logic"""
249
+
250
+ @staticmethod
251
+ def validate(code: str, allowed_imports: list[str] | None):
252
+ if not allowed_imports:
253
+ return
254
+
255
+ allowed = set(allowed_imports)
256
+ try:
257
+ tree = ast.parse(code)
258
+ except SyntaxError as exc:
259
+ raise CodeExecutionError(f"Code syntax error: {exc}") from exc
260
+
261
+ imported: list[str] = []
262
+ for node in ast.walk(tree):
263
+ if isinstance(node, ast.Import):
264
+ for alias in node.names:
265
+ imported.append(alias.name.split(".")[0])
266
+ elif isinstance(node, ast.ImportFrom):
267
+ if node.module:
268
+ imported.append(node.module.split(".")[0])
269
+
270
+ unauthorized = sorted({name for name in imported if name not in allowed})
271
+ if unauthorized:
272
+ raise CodeExecutionError(
273
+ f"The following modules are not in the allowed list: {', '.join(unauthorized)}"
274
+ )
275
+
276
+
277
+ class CodeExecutionEnvironment:
278
+ """Encapsulates actual code execution logic, maintaining consistency with DeepCode server flow"""
279
+
280
+ def __init__(self, workspace: WorkspaceManager):
281
+ self.workspace = workspace
282
+
283
+ def run_python(
284
+ self,
285
+ code: str,
286
+ timeout: int,
287
+ assets_dir: Path | None,
288
+ ) -> tuple[str, str, int, float]:
289
+ env = os.environ.copy()
290
+ env["PYTHONIOENCODING"] = "utf-8"
291
+
292
+ with self.workspace.create_temp_dir() as temp_dir:
293
+ code_file = temp_dir / "code.py"
294
+ code_file.write_text(code, encoding="utf-8")
295
+
296
+ work_dir = assets_dir if assets_dir else temp_dir
297
+ start_time = time.time()
298
+
299
+ result = subprocess.run(
300
+ [sys.executable, str(code_file)],
301
+ check=False,
302
+ capture_output=True,
303
+ text=True,
304
+ encoding="utf-8",
305
+ errors="replace",
306
+ timeout=timeout,
307
+ cwd=str(work_dir),
308
+ env=env,
309
+ )
310
+
311
+ elapsed_ms = (time.time() - start_time) * 1000
312
+ return result.stdout, result.stderr, result.returncode, elapsed_ms
313
+
314
+
315
+ WORKSPACE_MANAGER = WorkspaceManager()
316
+ OPERATION_LOGGER = OperationLogger()
317
+ EXECUTION_ENV = CodeExecutionEnvironment(WORKSPACE_MANAGER)
318
+
319
+
320
+ async def run_code(
321
+ language: str,
322
+ code: str,
323
+ timeout: int = 10,
324
+ assets_dir: str | None = None,
325
+ allowed_imports: list[str] | None = None,
326
+ ) -> dict[str, Any]:
327
+ """
328
+ Execute code in isolated environment, return result structure consistent with previous version.
329
+ """
330
+ if language.lower() != "python":
331
+ raise ValueError(f"Unsupported language: {language}, currently only Python is supported")
332
+
333
+ WORKSPACE_MANAGER.ensure_initialized()
334
+ ImportGuard.validate(code, allowed_imports)
335
+
336
+ assets_path = WORKSPACE_MANAGER.resolve_assets_dir(assets_dir)
337
+ loop = asyncio.get_running_loop()
338
+
339
+ def _execute():
340
+ return EXECUTION_ENV.run_python(code, timeout, assets_path)
341
+
342
+ try:
343
+ stdout, stderr, exit_code, elapsed_ms = await loop.run_in_executor(None, _execute)
344
+ artifacts, artifact_paths = WORKSPACE_MANAGER.collect_artifacts(assets_path)
345
+
346
+ result = {
347
+ "stdout": stdout,
348
+ "stderr": stderr,
349
+ "artifacts": artifacts,
350
+ "artifact_paths": artifact_paths,
351
+ "exit_code": exit_code,
352
+ "elapsed_ms": elapsed_ms,
353
+ }
354
+
355
+ OPERATION_LOGGER.log(
356
+ "execute_python",
357
+ {
358
+ "status": "success",
359
+ "language": language,
360
+ "timeout": timeout,
361
+ "assets_dir": str(assets_path) if assets_path else None,
362
+ "exit_code": exit_code,
363
+ "elapsed_ms": elapsed_ms,
364
+ "code_size": len(code),
365
+ },
366
+ )
367
+
368
+ return result
369
+
370
+ except subprocess.TimeoutExpired as timeout_exc:
371
+ # Code execution timeout
372
+ artifacts, artifact_paths = WORKSPACE_MANAGER.collect_artifacts(assets_path)
373
+ elapsed_ms = timeout * 1000
374
+ message = f"Code execution timeout ({timeout} seconds)"
375
+
376
+ logger.warning(f"Code execution timeout after {timeout}s: {timeout_exc}")
377
+
378
+ OPERATION_LOGGER.log(
379
+ "execute_python",
380
+ {
381
+ "status": "timeout",
382
+ "language": language,
383
+ "timeout": timeout,
384
+ "assets_dir": str(assets_path) if assets_path else None,
385
+ },
386
+ )
387
+
388
+ return {
389
+ "stdout": "",
390
+ "stderr": message,
391
+ "artifacts": artifacts,
392
+ "artifact_paths": artifact_paths,
393
+ "exit_code": -1,
394
+ "elapsed_ms": elapsed_ms,
395
+ }
396
+
397
+ except Exception as exc: # pylint: disable=broad-except
398
+ # Catch all other exceptions to ensure main flow is not interrupted
399
+ artifacts, artifact_paths = WORKSPACE_MANAGER.collect_artifacts(assets_path)
400
+ elapsed_ms = 0.0
401
+
402
+ if isinstance(exc, CodeExecutionError):
403
+ stderr_message = str(exc)
404
+ else:
405
+ stderr_message = f"Code execution failed: {exc}"
406
+
407
+ # Log detailed error information
408
+ logger.error(f"Code execution error: {exc}", exc_info=True)
409
+
410
+ OPERATION_LOGGER.log(
411
+ "execute_python",
412
+ {
413
+ "status": "error",
414
+ "language": language,
415
+ "timeout": timeout,
416
+ "assets_dir": str(assets_path) if assets_path else None,
417
+ "error": stderr_message,
418
+ },
419
+ )
420
+
421
+ return {
422
+ "stdout": "",
423
+ "stderr": stderr_message,
424
+ "artifacts": artifacts,
425
+ "artifact_paths": artifact_paths,
426
+ "exit_code": -1,
427
+ "elapsed_ms": elapsed_ms,
428
+ }
429
+
430
+
431
+ def run_code_sync(
432
+ language: str,
433
+ code: str,
434
+ timeout: int = 10,
435
+ assets_dir: str | None = None,
436
+ ) -> dict[str, Any]:
437
+ """
438
+ Synchronous version of code execution (for non-async environments)
439
+ """
440
+
441
+ return asyncio.run(run_code(language, code, timeout, assets_dir))
442
+
443
+
444
+ if __name__ == "__main__":
445
+ import textwrap
446
+
447
+ async def _demo():
448
+ print("==== 1. Test normal output ====")
449
+ sample1 = "print('Hello from run_code workspace!')"
450
+ result1 = await run_code("python", sample1, timeout=5)
451
+ print("stdout:", result1["stdout"])
452
+ print("stderr:", result1["stderr"])
453
+ print("artifacts:", result1.get("artifacts", {}))
454
+ print("artifact_paths:", result1.get("artifact_paths", []))
455
+ print("exit_code:", result1["exit_code"])
456
+ print("-" * 40)
457
+
458
+ print("==== 2. Test exception case ====")
459
+ sample2 = "raise ValueError('Test error from run_code!')"
460
+ result2 = await run_code("python", sample2, timeout=5)
461
+ print("stdout:", result2["stdout"])
462
+ print("stderr:", result2["stderr"])
463
+ print("exit_code:", result2["exit_code"])
464
+ print("-" * 40)
465
+
466
+ print("==== 3. Test code timeout ====")
467
+ sample3 = textwrap.dedent(
468
+ """
469
+ import time
470
+ time.sleep(10)
471
+ print("Timeout should occur before this prints.")
472
+ """
473
+ )
474
+ result3 = await run_code("python", sample3, timeout=2)
475
+ print("stdout:", result3["stdout"])
476
+ print("stderr:", result3["stderr"])
477
+ print("exit_code:", result3["exit_code"])
478
+ print("-" * 40)
479
+
480
+ print("==== 4. Test plotting functionality (matplotlib) ====")
481
+ sample4 = textwrap.dedent(
482
+ """
483
+ import matplotlib.pyplot as plt
484
+ plt.figure()
485
+ plt.plot([1, 2, 3], [4, 2, 5])
486
+ plt.title('Simple Plot')
487
+ plt.savefig('test_plot.png')
488
+ print('Plot created!')
489
+ """
490
+ )
491
+ result4 = await run_code("python", sample4, timeout=5)
492
+ print("stdout:", result4["stdout"])
493
+ print("stderr:", result4["stderr"])
494
+ print("artifacts:", result4.get("artifacts", {}))
495
+ print("artifact_paths:", result4.get("artifact_paths", []))
496
+ print("exit_code:", result4["exit_code"])
497
+ # Check generated images
498
+ if result4.get("artifact_paths"):
499
+ print("Generated image files:", result4["artifact_paths"])
500
+ else:
501
+ print("No image files found.")
502
+ print("-" * 40)
503
+
504
+ print("==== 5. Test standard input ====")
505
+ sample5 = textwrap.dedent(
506
+ """
507
+ text = input("Please enter content: ")
508
+ print("You entered: ", text)
509
+ """
510
+ )
511
+ # Standard run_code does not provide stdin, this example tests output behavior
512
+ result5 = await run_code("python", sample5, timeout=5)
513
+ print("stdout:", result5["stdout"])
514
+ print("stderr:", result5["stderr"])
515
+ print("exit_code:", result5["exit_code"])
516
+ print("-" * 40)
517
+
518
+ print("==== 6. Test multi-file and resource read/write ====")
519
+ sample6 = textwrap.dedent(
520
+ """
521
+ with open('test_file.txt', 'w', encoding='utf-8') as f:
522
+ f.write('Fake data for test!\\nAnother line.')
523
+ with open('test_file.txt', 'r', encoding='utf-8') as f:
524
+ content = f.read()
525
+ print('File content:', content)
526
+ """
527
+ )
528
+ result6 = await run_code("python", sample6, timeout=5)
529
+ print("stdout:", result6["stdout"])
530
+ print("stderr:", result6["stderr"])
531
+ print("artifacts:", result6.get("artifacts", {}))
532
+ print("artifact_paths:", result6.get("artifact_paths", []))
533
+ print("exit_code:", result6["exit_code"])
534
+ print("-" * 40)
535
+
536
+ asyncio.run(_demo())
@@ -0,0 +1,171 @@
1
+ """
2
+ Paper Search Tool - ArXiv paper search tool
3
+
4
+ Features:
5
+ 1. Search ArXiv papers
6
+ 2. Parse paper metadata
7
+ 3. Format paper information
8
+
9
+ Author: DeepTutor Team
10
+ Version: v1.0
11
+ Based on: TODO.md specification
12
+ """
13
+
14
+ import asyncio
15
+ from datetime import datetime
16
+ import re
17
+
18
+ import arxiv
19
+
20
+
21
+ class PaperSearchTool:
22
+ """ArXiv paper search tool"""
23
+
24
+ def __init__(self):
25
+ """Initialize search tool"""
26
+ self.client = arxiv.Client()
27
+
28
+ async def search_papers(
29
+ self,
30
+ query: str,
31
+ max_results: int = 3,
32
+ years_limit: int | None = 3,
33
+ sort_by: str = "relevance",
34
+ ) -> list[dict]:
35
+ """
36
+ Search ArXiv papers
37
+
38
+ Args:
39
+ query: Search query keywords
40
+ max_results: Number of papers to return
41
+ years_limit: Paper year limit (last N years), None means no limit
42
+ sort_by: Sort method - "relevance" or "date"
43
+
44
+ Returns:
45
+ List of papers, each paper contains:
46
+ - title: Title
47
+ - authors: Author list
48
+ - year: Publication year
49
+ - abstract: Abstract
50
+ - url: Paper URL
51
+ - arxiv_id: ArXiv ID
52
+ - published: Publication date (ISO format)
53
+ """
54
+ # Determine sort method
55
+ if sort_by == "date":
56
+ sort_criterion = arxiv.SortCriterion.SubmittedDate
57
+ else:
58
+ sort_criterion = arxiv.SortCriterion.Relevance
59
+
60
+ # Build search object
61
+ search = arxiv.Search(
62
+ query=query,
63
+ max_results=max_results * 3, # Search more for filtering
64
+ sort_by=sort_criterion,
65
+ sort_order=arxiv.SortOrder.Descending,
66
+ )
67
+
68
+ papers = []
69
+ current_year = datetime.now().year
70
+
71
+ # Execute search asynchronously (arxiv library is synchronous, but we can run in executor)
72
+ results = list(self.client.results(search))
73
+
74
+ for result in results:
75
+ # Extract year
76
+ published_date = result.published
77
+ paper_year = published_date.year
78
+
79
+ # Year filtering
80
+ if years_limit and (current_year - paper_year) > years_limit:
81
+ continue
82
+
83
+ # Extract ArXiv ID
84
+ arxiv_id = result.entry_id.split("/")[-1]
85
+ if "v" in arxiv_id:
86
+ arxiv_id = arxiv_id.split("v")[0] # Remove version number
87
+
88
+ # Extract authors
89
+ authors = [author.name for author in result.authors]
90
+
91
+ # Build paper information
92
+ paper_info = {
93
+ "title": result.title,
94
+ "authors": authors,
95
+ "year": paper_year,
96
+ "abstract": result.summary,
97
+ "url": result.entry_id,
98
+ "arxiv_id": arxiv_id,
99
+ "published": published_date.isoformat(),
100
+ }
101
+
102
+ papers.append(paper_info)
103
+
104
+ # If enough collected, stop
105
+ if len(papers) >= max_results:
106
+ break
107
+
108
+ return papers
109
+
110
+ def format_paper_citation(self, paper: dict) -> str:
111
+ """
112
+ Format paper citation
113
+
114
+ Args:
115
+ paper: Paper information dictionary
116
+
117
+ Returns:
118
+ Citation string: (FirstAuthor et al., Year)
119
+ """
120
+ if not paper["authors"]:
121
+ return f"(Unknown, {paper['year']})"
122
+
123
+ first_author = paper["authors"][0].split()[-1] # Extract surname
124
+
125
+ if len(paper["authors"]) > 1:
126
+ return f"({first_author} et al., {paper['year']})"
127
+ return f"({first_author}, {paper['year']})"
128
+
129
+ def extract_arxiv_id_from_url(self, url: str) -> str | None:
130
+ """
131
+ Extract ArXiv ID from URL
132
+
133
+ Args:
134
+ url: ArXiv URL
135
+
136
+ Returns:
137
+ ArXiv ID or None
138
+ """
139
+ match = re.search(r"arxiv\.org/(?:abs|pdf)/(\d+\.\d+)", url)
140
+ if match:
141
+ return match.group(1)
142
+ return None
143
+
144
+
145
+ # ========== Usage Example ==========
146
+
147
+
148
+ async def main():
149
+ """Test function"""
150
+ tool = PaperSearchTool()
151
+
152
+ # Test search
153
+ print("Search: transformer attention mechanism")
154
+ papers = await tool.search_papers(
155
+ query="transformer attention mechanism", max_results=3, years_limit=3, sort_by="relevance"
156
+ )
157
+
158
+ print(f"\nFound {len(papers)} papers:\n")
159
+
160
+ for i, paper in enumerate(papers, 1):
161
+ print(f"{i}. {paper['title']}")
162
+ print(f" Authors: {', '.join(paper['authors'][:3])}")
163
+ print(f" Year: {paper['year']}")
164
+ print(f" Citation: {tool.format_paper_citation(paper)}")
165
+ print(f" URL: {paper['url']}")
166
+ print(f" ArXiv ID: {paper['arxiv_id']}")
167
+ print()
168
+
169
+
170
+ if __name__ == "__main__":
171
+ asyncio.run(main())