realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,536 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
Run Code Tool - Code execution tool
|
|
4
|
+
Execute Python code in isolated workspace, preserving original input/output structure.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import ast
|
|
8
|
+
import asyncio
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
import os
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
import subprocess
|
|
15
|
+
import sys
|
|
16
|
+
import tempfile
|
|
17
|
+
import time
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
RUN_CODE_WORKSPACE_ENV = "RUN_CODE_WORKSPACE"
|
|
21
|
+
RUN_CODE_ALLOWED_ROOTS_ENV = "RUN_CODE_ALLOWED_ROOTS"
|
|
22
|
+
DEFAULT_WORKSPACE_NAME = "run_code_workspace"
|
|
23
|
+
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
|
24
|
+
|
|
25
|
+
from src.logging import get_logger
|
|
26
|
+
|
|
27
|
+
logger = get_logger("CodeExecutor")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _load_config() -> dict[str, Any]:
|
|
31
|
+
"""Load run_code configuration from main.yaml and module configs"""
|
|
32
|
+
try:
|
|
33
|
+
from src.services.config import load_config_with_main
|
|
34
|
+
|
|
35
|
+
# Try loading from solve_config (most common use case)
|
|
36
|
+
try:
|
|
37
|
+
config = load_config_with_main("solve_config.yaml", PROJECT_ROOT)
|
|
38
|
+
run_code_config = config.get("tools", {}).get("run_code", {})
|
|
39
|
+
if run_code_config:
|
|
40
|
+
logger.debug("Loaded run_code config from solve_config.yaml (with main.yaml)")
|
|
41
|
+
return run_code_config
|
|
42
|
+
except Exception as e:
|
|
43
|
+
logger.debug(f"Failed to load from solve_config: {e}")
|
|
44
|
+
|
|
45
|
+
# Fallback to question_config
|
|
46
|
+
try:
|
|
47
|
+
config = load_config_with_main("question_config.yaml", PROJECT_ROOT)
|
|
48
|
+
run_code_config = config.get("tools", {}).get("run_code", {})
|
|
49
|
+
if run_code_config:
|
|
50
|
+
logger.debug("Loaded run_code config from question_config.yaml (with main.yaml)")
|
|
51
|
+
return run_code_config
|
|
52
|
+
except Exception as e:
|
|
53
|
+
logger.debug(f"Failed to load from question_config: {e}")
|
|
54
|
+
|
|
55
|
+
# Fallback to main.yaml only
|
|
56
|
+
try:
|
|
57
|
+
config = load_config_with_main("solve_config.yaml", PROJECT_ROOT)
|
|
58
|
+
run_code_config = config.get("tools", {}).get("run_code", {})
|
|
59
|
+
if run_code_config:
|
|
60
|
+
return run_code_config
|
|
61
|
+
except Exception:
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
except ImportError:
|
|
65
|
+
logger.debug("config_loader not available, using fallback")
|
|
66
|
+
|
|
67
|
+
# Fallback: try loading main.yaml directly
|
|
68
|
+
try:
|
|
69
|
+
import yaml
|
|
70
|
+
|
|
71
|
+
main_config_path = PROJECT_ROOT / "config" / "main.yaml"
|
|
72
|
+
if main_config_path.exists():
|
|
73
|
+
with open(main_config_path, encoding="utf-8") as f:
|
|
74
|
+
config = yaml.safe_load(f) or {}
|
|
75
|
+
run_code_config = config.get("tools", {}).get("run_code", {})
|
|
76
|
+
if run_code_config:
|
|
77
|
+
logger.debug("Loaded run_code config from main.yaml")
|
|
78
|
+
return run_code_config
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.debug(f"Failed to load from main.yaml: {e}")
|
|
81
|
+
|
|
82
|
+
return {}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class CodeExecutionError(Exception):
|
|
86
|
+
"""Code execution error"""
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class OperationEntry:
|
|
91
|
+
action: str
|
|
92
|
+
details: dict[str, Any]
|
|
93
|
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class OperationLogger:
|
|
97
|
+
"""Simple operation history logger, inspired by code_implementation_server recording method"""
|
|
98
|
+
|
|
99
|
+
def __init__(self, max_entries: int = 200):
|
|
100
|
+
self._history: list[OperationEntry] = []
|
|
101
|
+
self._max_entries = max_entries
|
|
102
|
+
|
|
103
|
+
def log(self, action: str, details: dict[str, Any]):
|
|
104
|
+
entry = OperationEntry(action=action, details=details)
|
|
105
|
+
self._history.append(entry)
|
|
106
|
+
if len(self._history) > self._max_entries:
|
|
107
|
+
self._history.pop(0)
|
|
108
|
+
logger.debug(f"Operation logged: {action} | details={details.get('status')}")
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def history(self) -> list[OperationEntry]:
|
|
112
|
+
return list(self._history)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class WorkspaceManager:
|
|
116
|
+
"""Manages isolated workspace, similar to code_implementation_server workspace logic"""
|
|
117
|
+
|
|
118
|
+
def __init__(self):
|
|
119
|
+
# Load configuration (priority: environment variable > config file > default)
|
|
120
|
+
config = _load_config()
|
|
121
|
+
|
|
122
|
+
# Determine workspace directory (priority: environment variable > config file > default)
|
|
123
|
+
env_path = os.getenv(RUN_CODE_WORKSPACE_ENV)
|
|
124
|
+
if env_path:
|
|
125
|
+
self.base_dir = Path(env_path).expanduser().resolve()
|
|
126
|
+
else:
|
|
127
|
+
config_workspace = config.get("workspace")
|
|
128
|
+
if config_workspace:
|
|
129
|
+
# Support relative paths (relative to project root) and absolute paths
|
|
130
|
+
workspace_path = Path(config_workspace).expanduser()
|
|
131
|
+
if workspace_path.is_absolute():
|
|
132
|
+
self.base_dir = workspace_path.resolve()
|
|
133
|
+
else:
|
|
134
|
+
self.base_dir = (PROJECT_ROOT / workspace_path).resolve()
|
|
135
|
+
else:
|
|
136
|
+
# Default workspace is set under user directory
|
|
137
|
+
self.base_dir = (PROJECT_ROOT / "data" / "user" / DEFAULT_WORKSPACE_NAME).resolve()
|
|
138
|
+
|
|
139
|
+
# Determine allowed root paths list
|
|
140
|
+
# Default includes project root and user directory
|
|
141
|
+
self.allowed_roots: list[Path] = [
|
|
142
|
+
PROJECT_ROOT.resolve(),
|
|
143
|
+
(PROJECT_ROOT / "data" / "user").resolve(),
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
# Read allowed root paths from config file
|
|
147
|
+
config_allowed_roots = config.get("allowed_roots", [])
|
|
148
|
+
if isinstance(config_allowed_roots, str):
|
|
149
|
+
config_allowed_roots = [config_allowed_roots]
|
|
150
|
+
for root_path in config_allowed_roots:
|
|
151
|
+
root = Path(root_path).expanduser()
|
|
152
|
+
if root.is_absolute():
|
|
153
|
+
resolved_root = root.resolve()
|
|
154
|
+
else:
|
|
155
|
+
resolved_root = (PROJECT_ROOT / root).resolve()
|
|
156
|
+
# Avoid duplicate addition
|
|
157
|
+
if resolved_root not in self.allowed_roots:
|
|
158
|
+
self.allowed_roots.append(resolved_root)
|
|
159
|
+
|
|
160
|
+
# Read additional allowed root paths from environment variables
|
|
161
|
+
extra_roots = os.getenv(RUN_CODE_ALLOWED_ROOTS_ENV)
|
|
162
|
+
if extra_roots:
|
|
163
|
+
for raw_path in extra_roots.split(os.pathsep):
|
|
164
|
+
raw_path = raw_path.strip()
|
|
165
|
+
if raw_path:
|
|
166
|
+
path = Path(raw_path).expanduser()
|
|
167
|
+
if path.is_absolute():
|
|
168
|
+
resolved_path = path.resolve()
|
|
169
|
+
else:
|
|
170
|
+
resolved_path = (PROJECT_ROOT / path).resolve()
|
|
171
|
+
# Avoid duplicate addition
|
|
172
|
+
if resolved_path not in self.allowed_roots:
|
|
173
|
+
self.allowed_roots.append(resolved_path)
|
|
174
|
+
|
|
175
|
+
# Ensure workspace directory itself can also be a valid root path
|
|
176
|
+
if self.base_dir not in self.allowed_roots:
|
|
177
|
+
self.allowed_roots.append(self.base_dir)
|
|
178
|
+
|
|
179
|
+
self._initialized = False
|
|
180
|
+
|
|
181
|
+
def initialize(self):
|
|
182
|
+
if not self._initialized:
|
|
183
|
+
self.base_dir.mkdir(parents=True, exist_ok=True)
|
|
184
|
+
self._initialized = True
|
|
185
|
+
logger.info(f"Run-code workspace initialized at {self.base_dir}")
|
|
186
|
+
|
|
187
|
+
def ensure_initialized(self):
|
|
188
|
+
if not self._initialized:
|
|
189
|
+
self.initialize()
|
|
190
|
+
|
|
191
|
+
@contextmanager
|
|
192
|
+
def create_temp_dir(self) -> Path:
|
|
193
|
+
self.ensure_initialized()
|
|
194
|
+
with tempfile.TemporaryDirectory(dir=self.base_dir) as temp_dir:
|
|
195
|
+
yield Path(temp_dir)
|
|
196
|
+
|
|
197
|
+
def resolve_assets_dir(self, assets_dir: str | None) -> Path | None:
|
|
198
|
+
if not assets_dir:
|
|
199
|
+
return None
|
|
200
|
+
path = Path(assets_dir).expanduser()
|
|
201
|
+
if not path.is_absolute():
|
|
202
|
+
path = (self.base_dir / path).resolve()
|
|
203
|
+
self._ensure_within_allowed_roots(path)
|
|
204
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
205
|
+
return path
|
|
206
|
+
|
|
207
|
+
def collect_artifacts(self, assets_dir: Path | None) -> tuple[list[str], list[str]]:
|
|
208
|
+
artifacts: list[str] = []
|
|
209
|
+
artifact_paths: list[str] = []
|
|
210
|
+
if not assets_dir or not assets_dir.exists():
|
|
211
|
+
return artifacts, artifact_paths
|
|
212
|
+
|
|
213
|
+
for file_path in assets_dir.iterdir():
|
|
214
|
+
if file_path.is_file() and file_path.name != ".gitkeep":
|
|
215
|
+
artifacts.append(str(file_path.relative_to(assets_dir)))
|
|
216
|
+
artifact_paths.append(str(file_path.resolve()))
|
|
217
|
+
return artifacts, artifact_paths
|
|
218
|
+
|
|
219
|
+
def _ensure_within_allowed_roots(self, path: Path):
|
|
220
|
+
resolved_path = path.resolve()
|
|
221
|
+
for root in self.allowed_roots:
|
|
222
|
+
# Use Path object methods for path comparison, avoiding Windows path separator and case issues
|
|
223
|
+
try:
|
|
224
|
+
# Python 3.9+ use is_relative_to
|
|
225
|
+
if hasattr(resolved_path, "is_relative_to"):
|
|
226
|
+
if resolved_path.is_relative_to(root):
|
|
227
|
+
return
|
|
228
|
+
else:
|
|
229
|
+
# Python < 3.9 use path comparison after resolve()
|
|
230
|
+
# Convert to lowercase and normalize path separators for comparison (Windows compatible)
|
|
231
|
+
resolved_str = str(resolved_path).lower().replace("\\", "/")
|
|
232
|
+
root_str = str(root.resolve()).lower().replace("\\", "/")
|
|
233
|
+
if resolved_str.startswith(root_str):
|
|
234
|
+
return
|
|
235
|
+
except (ValueError, AttributeError):
|
|
236
|
+
# If is_relative_to fails, fallback to string comparison
|
|
237
|
+
resolved_str = str(resolved_path).lower().replace("\\", "/")
|
|
238
|
+
root_str = str(root.resolve()).lower().replace("\\", "/")
|
|
239
|
+
if resolved_str.startswith(root_str):
|
|
240
|
+
return
|
|
241
|
+
allowed = "\n".join(str(root) for root in self.allowed_roots)
|
|
242
|
+
raise ValueError(
|
|
243
|
+
f"Assets directory {resolved_path} must be located under one of the following allowed paths:\n{allowed}"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class ImportGuard:
|
|
248
|
+
"""Parse AST, restrict import modules, ensure consistency with allowed_imports logic"""
|
|
249
|
+
|
|
250
|
+
@staticmethod
|
|
251
|
+
def validate(code: str, allowed_imports: list[str] | None):
|
|
252
|
+
if not allowed_imports:
|
|
253
|
+
return
|
|
254
|
+
|
|
255
|
+
allowed = set(allowed_imports)
|
|
256
|
+
try:
|
|
257
|
+
tree = ast.parse(code)
|
|
258
|
+
except SyntaxError as exc:
|
|
259
|
+
raise CodeExecutionError(f"Code syntax error: {exc}") from exc
|
|
260
|
+
|
|
261
|
+
imported: list[str] = []
|
|
262
|
+
for node in ast.walk(tree):
|
|
263
|
+
if isinstance(node, ast.Import):
|
|
264
|
+
for alias in node.names:
|
|
265
|
+
imported.append(alias.name.split(".")[0])
|
|
266
|
+
elif isinstance(node, ast.ImportFrom):
|
|
267
|
+
if node.module:
|
|
268
|
+
imported.append(node.module.split(".")[0])
|
|
269
|
+
|
|
270
|
+
unauthorized = sorted({name for name in imported if name not in allowed})
|
|
271
|
+
if unauthorized:
|
|
272
|
+
raise CodeExecutionError(
|
|
273
|
+
f"The following modules are not in the allowed list: {', '.join(unauthorized)}"
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class CodeExecutionEnvironment:
|
|
278
|
+
"""Encapsulates actual code execution logic, maintaining consistency with DeepCode server flow"""
|
|
279
|
+
|
|
280
|
+
def __init__(self, workspace: WorkspaceManager):
|
|
281
|
+
self.workspace = workspace
|
|
282
|
+
|
|
283
|
+
def run_python(
|
|
284
|
+
self,
|
|
285
|
+
code: str,
|
|
286
|
+
timeout: int,
|
|
287
|
+
assets_dir: Path | None,
|
|
288
|
+
) -> tuple[str, str, int, float]:
|
|
289
|
+
env = os.environ.copy()
|
|
290
|
+
env["PYTHONIOENCODING"] = "utf-8"
|
|
291
|
+
|
|
292
|
+
with self.workspace.create_temp_dir() as temp_dir:
|
|
293
|
+
code_file = temp_dir / "code.py"
|
|
294
|
+
code_file.write_text(code, encoding="utf-8")
|
|
295
|
+
|
|
296
|
+
work_dir = assets_dir if assets_dir else temp_dir
|
|
297
|
+
start_time = time.time()
|
|
298
|
+
|
|
299
|
+
result = subprocess.run(
|
|
300
|
+
[sys.executable, str(code_file)],
|
|
301
|
+
check=False,
|
|
302
|
+
capture_output=True,
|
|
303
|
+
text=True,
|
|
304
|
+
encoding="utf-8",
|
|
305
|
+
errors="replace",
|
|
306
|
+
timeout=timeout,
|
|
307
|
+
cwd=str(work_dir),
|
|
308
|
+
env=env,
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
|
312
|
+
return result.stdout, result.stderr, result.returncode, elapsed_ms
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
WORKSPACE_MANAGER = WorkspaceManager()
|
|
316
|
+
OPERATION_LOGGER = OperationLogger()
|
|
317
|
+
EXECUTION_ENV = CodeExecutionEnvironment(WORKSPACE_MANAGER)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
async def run_code(
|
|
321
|
+
language: str,
|
|
322
|
+
code: str,
|
|
323
|
+
timeout: int = 10,
|
|
324
|
+
assets_dir: str | None = None,
|
|
325
|
+
allowed_imports: list[str] | None = None,
|
|
326
|
+
) -> dict[str, Any]:
|
|
327
|
+
"""
|
|
328
|
+
Execute code in isolated environment, return result structure consistent with previous version.
|
|
329
|
+
"""
|
|
330
|
+
if language.lower() != "python":
|
|
331
|
+
raise ValueError(f"Unsupported language: {language}, currently only Python is supported")
|
|
332
|
+
|
|
333
|
+
WORKSPACE_MANAGER.ensure_initialized()
|
|
334
|
+
ImportGuard.validate(code, allowed_imports)
|
|
335
|
+
|
|
336
|
+
assets_path = WORKSPACE_MANAGER.resolve_assets_dir(assets_dir)
|
|
337
|
+
loop = asyncio.get_running_loop()
|
|
338
|
+
|
|
339
|
+
def _execute():
|
|
340
|
+
return EXECUTION_ENV.run_python(code, timeout, assets_path)
|
|
341
|
+
|
|
342
|
+
try:
|
|
343
|
+
stdout, stderr, exit_code, elapsed_ms = await loop.run_in_executor(None, _execute)
|
|
344
|
+
artifacts, artifact_paths = WORKSPACE_MANAGER.collect_artifacts(assets_path)
|
|
345
|
+
|
|
346
|
+
result = {
|
|
347
|
+
"stdout": stdout,
|
|
348
|
+
"stderr": stderr,
|
|
349
|
+
"artifacts": artifacts,
|
|
350
|
+
"artifact_paths": artifact_paths,
|
|
351
|
+
"exit_code": exit_code,
|
|
352
|
+
"elapsed_ms": elapsed_ms,
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
OPERATION_LOGGER.log(
|
|
356
|
+
"execute_python",
|
|
357
|
+
{
|
|
358
|
+
"status": "success",
|
|
359
|
+
"language": language,
|
|
360
|
+
"timeout": timeout,
|
|
361
|
+
"assets_dir": str(assets_path) if assets_path else None,
|
|
362
|
+
"exit_code": exit_code,
|
|
363
|
+
"elapsed_ms": elapsed_ms,
|
|
364
|
+
"code_size": len(code),
|
|
365
|
+
},
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
return result
|
|
369
|
+
|
|
370
|
+
except subprocess.TimeoutExpired as timeout_exc:
|
|
371
|
+
# Code execution timeout
|
|
372
|
+
artifacts, artifact_paths = WORKSPACE_MANAGER.collect_artifacts(assets_path)
|
|
373
|
+
elapsed_ms = timeout * 1000
|
|
374
|
+
message = f"Code execution timeout ({timeout} seconds)"
|
|
375
|
+
|
|
376
|
+
logger.warning(f"Code execution timeout after {timeout}s: {timeout_exc}")
|
|
377
|
+
|
|
378
|
+
OPERATION_LOGGER.log(
|
|
379
|
+
"execute_python",
|
|
380
|
+
{
|
|
381
|
+
"status": "timeout",
|
|
382
|
+
"language": language,
|
|
383
|
+
"timeout": timeout,
|
|
384
|
+
"assets_dir": str(assets_path) if assets_path else None,
|
|
385
|
+
},
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
return {
|
|
389
|
+
"stdout": "",
|
|
390
|
+
"stderr": message,
|
|
391
|
+
"artifacts": artifacts,
|
|
392
|
+
"artifact_paths": artifact_paths,
|
|
393
|
+
"exit_code": -1,
|
|
394
|
+
"elapsed_ms": elapsed_ms,
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
except Exception as exc: # pylint: disable=broad-except
|
|
398
|
+
# Catch all other exceptions to ensure main flow is not interrupted
|
|
399
|
+
artifacts, artifact_paths = WORKSPACE_MANAGER.collect_artifacts(assets_path)
|
|
400
|
+
elapsed_ms = 0.0
|
|
401
|
+
|
|
402
|
+
if isinstance(exc, CodeExecutionError):
|
|
403
|
+
stderr_message = str(exc)
|
|
404
|
+
else:
|
|
405
|
+
stderr_message = f"Code execution failed: {exc}"
|
|
406
|
+
|
|
407
|
+
# Log detailed error information
|
|
408
|
+
logger.error(f"Code execution error: {exc}", exc_info=True)
|
|
409
|
+
|
|
410
|
+
OPERATION_LOGGER.log(
|
|
411
|
+
"execute_python",
|
|
412
|
+
{
|
|
413
|
+
"status": "error",
|
|
414
|
+
"language": language,
|
|
415
|
+
"timeout": timeout,
|
|
416
|
+
"assets_dir": str(assets_path) if assets_path else None,
|
|
417
|
+
"error": stderr_message,
|
|
418
|
+
},
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
return {
|
|
422
|
+
"stdout": "",
|
|
423
|
+
"stderr": stderr_message,
|
|
424
|
+
"artifacts": artifacts,
|
|
425
|
+
"artifact_paths": artifact_paths,
|
|
426
|
+
"exit_code": -1,
|
|
427
|
+
"elapsed_ms": elapsed_ms,
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def run_code_sync(
|
|
432
|
+
language: str,
|
|
433
|
+
code: str,
|
|
434
|
+
timeout: int = 10,
|
|
435
|
+
assets_dir: str | None = None,
|
|
436
|
+
) -> dict[str, Any]:
|
|
437
|
+
"""
|
|
438
|
+
Synchronous version of code execution (for non-async environments)
|
|
439
|
+
"""
|
|
440
|
+
|
|
441
|
+
return asyncio.run(run_code(language, code, timeout, assets_dir))
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
if __name__ == "__main__":
|
|
445
|
+
import textwrap
|
|
446
|
+
|
|
447
|
+
async def _demo():
|
|
448
|
+
print("==== 1. Test normal output ====")
|
|
449
|
+
sample1 = "print('Hello from run_code workspace!')"
|
|
450
|
+
result1 = await run_code("python", sample1, timeout=5)
|
|
451
|
+
print("stdout:", result1["stdout"])
|
|
452
|
+
print("stderr:", result1["stderr"])
|
|
453
|
+
print("artifacts:", result1.get("artifacts", {}))
|
|
454
|
+
print("artifact_paths:", result1.get("artifact_paths", []))
|
|
455
|
+
print("exit_code:", result1["exit_code"])
|
|
456
|
+
print("-" * 40)
|
|
457
|
+
|
|
458
|
+
print("==== 2. Test exception case ====")
|
|
459
|
+
sample2 = "raise ValueError('Test error from run_code!')"
|
|
460
|
+
result2 = await run_code("python", sample2, timeout=5)
|
|
461
|
+
print("stdout:", result2["stdout"])
|
|
462
|
+
print("stderr:", result2["stderr"])
|
|
463
|
+
print("exit_code:", result2["exit_code"])
|
|
464
|
+
print("-" * 40)
|
|
465
|
+
|
|
466
|
+
print("==== 3. Test code timeout ====")
|
|
467
|
+
sample3 = textwrap.dedent(
|
|
468
|
+
"""
|
|
469
|
+
import time
|
|
470
|
+
time.sleep(10)
|
|
471
|
+
print("Timeout should occur before this prints.")
|
|
472
|
+
"""
|
|
473
|
+
)
|
|
474
|
+
result3 = await run_code("python", sample3, timeout=2)
|
|
475
|
+
print("stdout:", result3["stdout"])
|
|
476
|
+
print("stderr:", result3["stderr"])
|
|
477
|
+
print("exit_code:", result3["exit_code"])
|
|
478
|
+
print("-" * 40)
|
|
479
|
+
|
|
480
|
+
print("==== 4. Test plotting functionality (matplotlib) ====")
|
|
481
|
+
sample4 = textwrap.dedent(
|
|
482
|
+
"""
|
|
483
|
+
import matplotlib.pyplot as plt
|
|
484
|
+
plt.figure()
|
|
485
|
+
plt.plot([1, 2, 3], [4, 2, 5])
|
|
486
|
+
plt.title('Simple Plot')
|
|
487
|
+
plt.savefig('test_plot.png')
|
|
488
|
+
print('Plot created!')
|
|
489
|
+
"""
|
|
490
|
+
)
|
|
491
|
+
result4 = await run_code("python", sample4, timeout=5)
|
|
492
|
+
print("stdout:", result4["stdout"])
|
|
493
|
+
print("stderr:", result4["stderr"])
|
|
494
|
+
print("artifacts:", result4.get("artifacts", {}))
|
|
495
|
+
print("artifact_paths:", result4.get("artifact_paths", []))
|
|
496
|
+
print("exit_code:", result4["exit_code"])
|
|
497
|
+
# Check generated images
|
|
498
|
+
if result4.get("artifact_paths"):
|
|
499
|
+
print("Generated image files:", result4["artifact_paths"])
|
|
500
|
+
else:
|
|
501
|
+
print("No image files found.")
|
|
502
|
+
print("-" * 40)
|
|
503
|
+
|
|
504
|
+
print("==== 5. Test standard input ====")
|
|
505
|
+
sample5 = textwrap.dedent(
|
|
506
|
+
"""
|
|
507
|
+
text = input("Please enter content: ")
|
|
508
|
+
print("You entered: ", text)
|
|
509
|
+
"""
|
|
510
|
+
)
|
|
511
|
+
# Standard run_code does not provide stdin, this example tests output behavior
|
|
512
|
+
result5 = await run_code("python", sample5, timeout=5)
|
|
513
|
+
print("stdout:", result5["stdout"])
|
|
514
|
+
print("stderr:", result5["stderr"])
|
|
515
|
+
print("exit_code:", result5["exit_code"])
|
|
516
|
+
print("-" * 40)
|
|
517
|
+
|
|
518
|
+
print("==== 6. Test multi-file and resource read/write ====")
|
|
519
|
+
sample6 = textwrap.dedent(
|
|
520
|
+
"""
|
|
521
|
+
with open('test_file.txt', 'w', encoding='utf-8') as f:
|
|
522
|
+
f.write('Fake data for test!\\nAnother line.')
|
|
523
|
+
with open('test_file.txt', 'r', encoding='utf-8') as f:
|
|
524
|
+
content = f.read()
|
|
525
|
+
print('File content:', content)
|
|
526
|
+
"""
|
|
527
|
+
)
|
|
528
|
+
result6 = await run_code("python", sample6, timeout=5)
|
|
529
|
+
print("stdout:", result6["stdout"])
|
|
530
|
+
print("stderr:", result6["stderr"])
|
|
531
|
+
print("artifacts:", result6.get("artifacts", {}))
|
|
532
|
+
print("artifact_paths:", result6.get("artifact_paths", []))
|
|
533
|
+
print("exit_code:", result6["exit_code"])
|
|
534
|
+
print("-" * 40)
|
|
535
|
+
|
|
536
|
+
asyncio.run(_demo())
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Paper Search Tool - ArXiv paper search tool
|
|
3
|
+
|
|
4
|
+
Features:
|
|
5
|
+
1. Search ArXiv papers
|
|
6
|
+
2. Parse paper metadata
|
|
7
|
+
3. Format paper information
|
|
8
|
+
|
|
9
|
+
Author: DeepTutor Team
|
|
10
|
+
Version: v1.0
|
|
11
|
+
Based on: TODO.md specification
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
import re
|
|
17
|
+
|
|
18
|
+
import arxiv
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PaperSearchTool:
|
|
22
|
+
"""ArXiv paper search tool"""
|
|
23
|
+
|
|
24
|
+
def __init__(self):
|
|
25
|
+
"""Initialize search tool"""
|
|
26
|
+
self.client = arxiv.Client()
|
|
27
|
+
|
|
28
|
+
async def search_papers(
|
|
29
|
+
self,
|
|
30
|
+
query: str,
|
|
31
|
+
max_results: int = 3,
|
|
32
|
+
years_limit: int | None = 3,
|
|
33
|
+
sort_by: str = "relevance",
|
|
34
|
+
) -> list[dict]:
|
|
35
|
+
"""
|
|
36
|
+
Search ArXiv papers
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
query: Search query keywords
|
|
40
|
+
max_results: Number of papers to return
|
|
41
|
+
years_limit: Paper year limit (last N years), None means no limit
|
|
42
|
+
sort_by: Sort method - "relevance" or "date"
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
List of papers, each paper contains:
|
|
46
|
+
- title: Title
|
|
47
|
+
- authors: Author list
|
|
48
|
+
- year: Publication year
|
|
49
|
+
- abstract: Abstract
|
|
50
|
+
- url: Paper URL
|
|
51
|
+
- arxiv_id: ArXiv ID
|
|
52
|
+
- published: Publication date (ISO format)
|
|
53
|
+
"""
|
|
54
|
+
# Determine sort method
|
|
55
|
+
if sort_by == "date":
|
|
56
|
+
sort_criterion = arxiv.SortCriterion.SubmittedDate
|
|
57
|
+
else:
|
|
58
|
+
sort_criterion = arxiv.SortCriterion.Relevance
|
|
59
|
+
|
|
60
|
+
# Build search object
|
|
61
|
+
search = arxiv.Search(
|
|
62
|
+
query=query,
|
|
63
|
+
max_results=max_results * 3, # Search more for filtering
|
|
64
|
+
sort_by=sort_criterion,
|
|
65
|
+
sort_order=arxiv.SortOrder.Descending,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
papers = []
|
|
69
|
+
current_year = datetime.now().year
|
|
70
|
+
|
|
71
|
+
# Execute search asynchronously (arxiv library is synchronous, but we can run in executor)
|
|
72
|
+
results = list(self.client.results(search))
|
|
73
|
+
|
|
74
|
+
for result in results:
|
|
75
|
+
# Extract year
|
|
76
|
+
published_date = result.published
|
|
77
|
+
paper_year = published_date.year
|
|
78
|
+
|
|
79
|
+
# Year filtering
|
|
80
|
+
if years_limit and (current_year - paper_year) > years_limit:
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
# Extract ArXiv ID
|
|
84
|
+
arxiv_id = result.entry_id.split("/")[-1]
|
|
85
|
+
if "v" in arxiv_id:
|
|
86
|
+
arxiv_id = arxiv_id.split("v")[0] # Remove version number
|
|
87
|
+
|
|
88
|
+
# Extract authors
|
|
89
|
+
authors = [author.name for author in result.authors]
|
|
90
|
+
|
|
91
|
+
# Build paper information
|
|
92
|
+
paper_info = {
|
|
93
|
+
"title": result.title,
|
|
94
|
+
"authors": authors,
|
|
95
|
+
"year": paper_year,
|
|
96
|
+
"abstract": result.summary,
|
|
97
|
+
"url": result.entry_id,
|
|
98
|
+
"arxiv_id": arxiv_id,
|
|
99
|
+
"published": published_date.isoformat(),
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
papers.append(paper_info)
|
|
103
|
+
|
|
104
|
+
# If enough collected, stop
|
|
105
|
+
if len(papers) >= max_results:
|
|
106
|
+
break
|
|
107
|
+
|
|
108
|
+
return papers
|
|
109
|
+
|
|
110
|
+
def format_paper_citation(self, paper: dict) -> str:
|
|
111
|
+
"""
|
|
112
|
+
Format paper citation
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
paper: Paper information dictionary
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Citation string: (FirstAuthor et al., Year)
|
|
119
|
+
"""
|
|
120
|
+
if not paper["authors"]:
|
|
121
|
+
return f"(Unknown, {paper['year']})"
|
|
122
|
+
|
|
123
|
+
first_author = paper["authors"][0].split()[-1] # Extract surname
|
|
124
|
+
|
|
125
|
+
if len(paper["authors"]) > 1:
|
|
126
|
+
return f"({first_author} et al., {paper['year']})"
|
|
127
|
+
return f"({first_author}, {paper['year']})"
|
|
128
|
+
|
|
129
|
+
def extract_arxiv_id_from_url(self, url: str) -> str | None:
|
|
130
|
+
"""
|
|
131
|
+
Extract ArXiv ID from URL
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
url: ArXiv URL
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
ArXiv ID or None
|
|
138
|
+
"""
|
|
139
|
+
match = re.search(r"arxiv\.org/(?:abs|pdf)/(\d+\.\d+)", url)
|
|
140
|
+
if match:
|
|
141
|
+
return match.group(1)
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ========== Usage Example ==========
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
async def main():
|
|
149
|
+
"""Test function"""
|
|
150
|
+
tool = PaperSearchTool()
|
|
151
|
+
|
|
152
|
+
# Test search
|
|
153
|
+
print("Search: transformer attention mechanism")
|
|
154
|
+
papers = await tool.search_papers(
|
|
155
|
+
query="transformer attention mechanism", max_results=3, years_limit=3, sort_by="relevance"
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
print(f"\nFound {len(papers)} papers:\n")
|
|
159
|
+
|
|
160
|
+
for i, paper in enumerate(papers, 1):
|
|
161
|
+
print(f"{i}. {paper['title']}")
|
|
162
|
+
print(f" Authors: {', '.join(paper['authors'][:3])}")
|
|
163
|
+
print(f" Year: {paper['year']}")
|
|
164
|
+
print(f" Citation: {tool.format_paper_citation(paper)}")
|
|
165
|
+
print(f" URL: {paper['url']}")
|
|
166
|
+
print(f" ArXiv ID: {paper['arxiv_id']}")
|
|
167
|
+
print()
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
if __name__ == "__main__":
|
|
171
|
+
asyncio.run(main())
|