jarvis-ai-assistant 0.1.138__py3-none-any.whl → 0.1.141__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jarvis-ai-assistant might be problematic. Click here for more details.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +62 -14
- jarvis/jarvis_agent/builtin_input_handler.py +4 -14
- jarvis/jarvis_agent/main.py +1 -1
- jarvis/jarvis_agent/patch.py +37 -40
- jarvis/jarvis_agent/shell_input_handler.py +2 -3
- jarvis/jarvis_code_agent/code_agent.py +23 -30
- jarvis/jarvis_code_analysis/checklists/__init__.py +3 -0
- jarvis/jarvis_code_analysis/checklists/c_cpp.py +50 -0
- jarvis/jarvis_code_analysis/checklists/csharp.py +75 -0
- jarvis/jarvis_code_analysis/checklists/data_format.py +82 -0
- jarvis/jarvis_code_analysis/checklists/devops.py +107 -0
- jarvis/jarvis_code_analysis/checklists/docs.py +87 -0
- jarvis/jarvis_code_analysis/checklists/go.py +52 -0
- jarvis/jarvis_code_analysis/checklists/infrastructure.py +98 -0
- jarvis/jarvis_code_analysis/checklists/java.py +66 -0
- jarvis/jarvis_code_analysis/checklists/javascript.py +73 -0
- jarvis/jarvis_code_analysis/checklists/kotlin.py +107 -0
- jarvis/jarvis_code_analysis/checklists/loader.py +76 -0
- jarvis/jarvis_code_analysis/checklists/php.py +77 -0
- jarvis/jarvis_code_analysis/checklists/python.py +56 -0
- jarvis/jarvis_code_analysis/checklists/ruby.py +107 -0
- jarvis/jarvis_code_analysis/checklists/rust.py +58 -0
- jarvis/jarvis_code_analysis/checklists/shell.py +75 -0
- jarvis/jarvis_code_analysis/checklists/sql.py +72 -0
- jarvis/jarvis_code_analysis/checklists/swift.py +77 -0
- jarvis/jarvis_code_analysis/checklists/web.py +97 -0
- jarvis/jarvis_code_analysis/code_review.py +660 -0
- jarvis/jarvis_dev/main.py +61 -88
- jarvis/jarvis_git_squash/main.py +3 -3
- jarvis/jarvis_git_utils/git_commiter.py +242 -0
- jarvis/jarvis_init/main.py +62 -0
- jarvis/jarvis_platform/base.py +4 -0
- jarvis/jarvis_platform/kimi.py +173 -5
- jarvis/jarvis_platform/openai.py +3 -0
- jarvis/jarvis_platform/registry.py +1 -0
- jarvis/jarvis_platform/yuanbao.py +275 -5
- jarvis/jarvis_tools/ask_codebase.py +6 -9
- jarvis/jarvis_tools/ask_user.py +17 -5
- jarvis/jarvis_tools/base.py +3 -1
- jarvis/jarvis_tools/chdir.py +1 -0
- jarvis/jarvis_tools/create_code_agent.py +4 -3
- jarvis/jarvis_tools/create_sub_agent.py +1 -0
- jarvis/jarvis_tools/execute_script.py +170 -0
- jarvis/jarvis_tools/file_analyzer.py +90 -239
- jarvis/jarvis_tools/file_operation.py +99 -31
- jarvis/jarvis_tools/{find_methodolopy.py → find_methodology.py} +2 -1
- jarvis/jarvis_tools/lsp_get_diagnostics.py +2 -0
- jarvis/jarvis_tools/methodology.py +11 -11
- jarvis/jarvis_tools/read_code.py +2 -0
- jarvis/jarvis_tools/read_webpage.py +33 -196
- jarvis/jarvis_tools/registry.py +68 -131
- jarvis/jarvis_tools/search_web.py +14 -6
- jarvis/jarvis_tools/virtual_tty.py +399 -0
- jarvis/jarvis_utils/config.py +29 -3
- jarvis/jarvis_utils/embedding.py +0 -317
- jarvis/jarvis_utils/file_processors.py +343 -0
- jarvis/jarvis_utils/input.py +0 -1
- jarvis/jarvis_utils/methodology.py +94 -435
- jarvis/jarvis_utils/utils.py +207 -9
- {jarvis_ai_assistant-0.1.138.dist-info → jarvis_ai_assistant-0.1.141.dist-info}/METADATA +4 -4
- jarvis_ai_assistant-0.1.141.dist-info/RECORD +94 -0
- {jarvis_ai_assistant-0.1.138.dist-info → jarvis_ai_assistant-0.1.141.dist-info}/entry_points.txt +4 -4
- jarvis/jarvis_code_agent/file_select.py +0 -202
- jarvis/jarvis_platform/ai8.py +0 -268
- jarvis/jarvis_platform/ollama.py +0 -137
- jarvis/jarvis_platform/oyi.py +0 -307
- jarvis/jarvis_rag/file_processors.py +0 -138
- jarvis/jarvis_rag/main.py +0 -1734
- jarvis/jarvis_tools/code_review.py +0 -333
- jarvis/jarvis_tools/execute_python_script.py +0 -58
- jarvis/jarvis_tools/execute_shell.py +0 -97
- jarvis/jarvis_tools/execute_shell_script.py +0 -58
- jarvis/jarvis_tools/find_caller.py +0 -278
- jarvis/jarvis_tools/find_symbol.py +0 -295
- jarvis/jarvis_tools/function_analyzer.py +0 -331
- jarvis/jarvis_tools/git_commiter.py +0 -167
- jarvis/jarvis_tools/project_analyzer.py +0 -304
- jarvis/jarvis_tools/rag.py +0 -143
- jarvis/jarvis_tools/tool_generator.py +0 -221
- jarvis_ai_assistant-0.1.138.dist-info/RECORD +0 -85
- /jarvis/{jarvis_rag → jarvis_init}/__init__.py +0 -0
- {jarvis_ai_assistant-0.1.138.dist-info → jarvis_ai_assistant-0.1.141.dist-info}/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.138.dist-info → jarvis_ai_assistant-0.1.141.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.1.138.dist-info → jarvis_ai_assistant-0.1.141.dist-info}/top_level.txt +0 -0
|
@@ -1,16 +1,23 @@
|
|
|
1
1
|
from typing import Dict, Any
|
|
2
2
|
import os
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
|
|
4
5
|
from yaspin import yaspin
|
|
5
6
|
|
|
6
7
|
from jarvis.jarvis_utils.globals import add_read_file_record
|
|
7
8
|
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
9
|
+
# 导入文件处理器
|
|
10
|
+
from jarvis.jarvis_utils.file_processors import (
|
|
11
|
+
TextFileProcessor, PDFProcessor, DocxProcessor,
|
|
12
|
+
PPTProcessor, ExcelProcessor
|
|
13
|
+
)
|
|
8
14
|
|
|
9
15
|
|
|
10
16
|
|
|
11
17
|
class FileOperationTool:
|
|
12
18
|
name = "file_operation"
|
|
13
|
-
description = "
|
|
19
|
+
description = "文件批量操作工具,可批量读写多个文件,支持文本、PDF、Word、Excel、PPT等格式,适用于需要同时处理多个文件的场景(读取配置文件、保存生成内容等)"
|
|
20
|
+
labels = ['file', 'io', 'batch']
|
|
14
21
|
parameters = {
|
|
15
22
|
"type": "object",
|
|
16
23
|
"properties": {
|
|
@@ -35,12 +42,29 @@ class FileOperationTool:
|
|
|
35
42
|
"required": ["operation", "files"]
|
|
36
43
|
}
|
|
37
44
|
|
|
45
|
+
def _get_file_processor(self, file_path: str):
|
|
46
|
+
"""获取适合处理指定文件的处理器"""
|
|
47
|
+
processors = [
|
|
48
|
+
PDFProcessor, # PDF文件处理器
|
|
49
|
+
DocxProcessor, # Word文档处理器
|
|
50
|
+
PPTProcessor, # PowerPoint演示文稿处理器
|
|
51
|
+
ExcelProcessor, # Excel表格处理器
|
|
52
|
+
TextFileProcessor # 文本文件处理器(放在最后作为兜底)
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
for processor in processors:
|
|
56
|
+
if processor.can_handle(file_path):
|
|
57
|
+
return processor
|
|
58
|
+
|
|
59
|
+
return None # 如果没有合适的处理器,返回None
|
|
60
|
+
|
|
38
61
|
def _handle_single_file(self, operation: str, filepath: str, content: str = "",
|
|
39
62
|
start_line: int = 1, end_line: int = -1) -> Dict[str, Any]:
|
|
40
63
|
"""Handle operations for a single file"""
|
|
41
64
|
try:
|
|
42
65
|
abs_path = os.path.abspath(filepath)
|
|
43
66
|
add_read_file_record(abs_path)
|
|
67
|
+
|
|
44
68
|
if operation == "read":
|
|
45
69
|
with yaspin(text=f"正在读取文件: {abs_path}...", color="cyan") as spinner:
|
|
46
70
|
if not os.path.exists(abs_path):
|
|
@@ -50,38 +74,82 @@ class FileOperationTool:
|
|
|
50
74
|
"stderr": f"文件不存在: {abs_path}"
|
|
51
75
|
}
|
|
52
76
|
|
|
53
|
-
|
|
77
|
+
# 检查文件大小
|
|
78
|
+
if os.path.getsize(abs_path) > 30 * 1024 * 1024: # 30MB
|
|
54
79
|
return {
|
|
55
80
|
"success": False,
|
|
56
81
|
"stdout": "",
|
|
57
|
-
"stderr": "
|
|
82
|
+
"stderr": "文件过大 (>30MB),无法处理"
|
|
58
83
|
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
end_line = end_line if end_line >= 0 else total_lines + end_line + 1
|
|
67
|
-
start_line = max(1, min(start_line, total_lines))
|
|
68
|
-
end_line = max(1, min(end_line, total_lines))
|
|
69
|
-
if end_line == -1:
|
|
70
|
-
end_line = total_lines
|
|
71
|
-
|
|
72
|
-
if start_line > end_line:
|
|
73
|
-
spinner.text = "无效的行范围"
|
|
74
|
-
spinner.fail("❌")
|
|
75
|
-
error_msg = f"无效的行范围 [{start_line, end_line}] (文件总行数: {total_lines})"
|
|
84
|
+
|
|
85
|
+
file_extension = Path(abs_path).suffix.lower()
|
|
86
|
+
|
|
87
|
+
# 获取文件处理器
|
|
88
|
+
processor = self._get_file_processor(abs_path)
|
|
89
|
+
|
|
90
|
+
if processor is None:
|
|
76
91
|
return {
|
|
77
92
|
"success": False,
|
|
78
93
|
"stdout": "",
|
|
79
|
-
"stderr":
|
|
94
|
+
"stderr": f"不支持的文件类型: {file_extension}"
|
|
80
95
|
}
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
96
|
+
|
|
97
|
+
# 特殊处理纯文本文件,支持行范围选择
|
|
98
|
+
if processor == TextFileProcessor:
|
|
99
|
+
try:
|
|
100
|
+
with open(abs_path, 'r', encoding='utf-8', errors="ignore") as f:
|
|
101
|
+
lines = f.readlines()
|
|
102
|
+
|
|
103
|
+
total_lines = len(lines)
|
|
104
|
+
start_line = start_line if start_line >= 0 else total_lines + start_line + 1
|
|
105
|
+
end_line = end_line if end_line >= 0 else total_lines + end_line + 1
|
|
106
|
+
start_line = max(1, min(start_line, total_lines))
|
|
107
|
+
end_line = max(1, min(end_line, total_lines))
|
|
108
|
+
if end_line == -1:
|
|
109
|
+
end_line = total_lines
|
|
110
|
+
|
|
111
|
+
if start_line > end_line:
|
|
112
|
+
spinner.text = "无效的行范围"
|
|
113
|
+
spinner.fail("❌")
|
|
114
|
+
error_msg = f"无效的行范围 [{start_line, end_line}] (文件总行数: {total_lines})"
|
|
115
|
+
return {
|
|
116
|
+
"success": False,
|
|
117
|
+
"stdout": "",
|
|
118
|
+
"stderr": error_msg
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
content = "".join(lines[start_line - 1:end_line])
|
|
122
|
+
file_info = f"\n文件: {abs_path} (文本文件)\n行: [{start_line}-{end_line}]/{total_lines}"
|
|
123
|
+
except Exception as e:
|
|
124
|
+
return {
|
|
125
|
+
"success": False,
|
|
126
|
+
"stdout": "",
|
|
127
|
+
"stderr": f"读取文本文件失败: {str(e)}"
|
|
128
|
+
}
|
|
129
|
+
else:
|
|
130
|
+
# 使用专用处理器来提取非文本文件的内容
|
|
131
|
+
try:
|
|
132
|
+
spinner.text = f"使用 {processor.__name__} 提取 {abs_path} 的内容..."
|
|
133
|
+
content = processor.extract_text(abs_path)
|
|
134
|
+
# 获取文件类型友好名称
|
|
135
|
+
file_type_names = {
|
|
136
|
+
PDFProcessor: "PDF文档",
|
|
137
|
+
DocxProcessor: "Word文档",
|
|
138
|
+
PPTProcessor: "PowerPoint演示文稿",
|
|
139
|
+
ExcelProcessor: "Excel表格"
|
|
140
|
+
}
|
|
141
|
+
file_type = file_type_names.get(processor, file_extension)
|
|
142
|
+
file_info = f"\n文件: {abs_path} ({file_type})"
|
|
143
|
+
except Exception as e:
|
|
144
|
+
return {
|
|
145
|
+
"success": False,
|
|
146
|
+
"stdout": "",
|
|
147
|
+
"stderr": f"提取 {file_extension} 文件内容失败: {str(e)}"
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
# 构建输出信息
|
|
151
|
+
output = f"{file_info}\n{content}" + "\n\n"
|
|
152
|
+
|
|
85
153
|
spinner.text = f"文件读取完成: {abs_path}"
|
|
86
154
|
spinner.ok("✅")
|
|
87
155
|
return {
|
|
@@ -98,13 +166,13 @@ class FileOperationTool:
|
|
|
98
166
|
spinner.ok("✅")
|
|
99
167
|
return {
|
|
100
168
|
"success": True,
|
|
101
|
-
"stdout": f"
|
|
169
|
+
"stdout": f"文件写入成功: {abs_path}",
|
|
102
170
|
"stderr": ""
|
|
103
171
|
}
|
|
104
172
|
return {
|
|
105
173
|
"success": False,
|
|
106
174
|
"stdout": "",
|
|
107
|
-
"stderr": f"
|
|
175
|
+
"stderr": f"未知操作: {operation}"
|
|
108
176
|
}
|
|
109
177
|
|
|
110
178
|
except Exception as e:
|
|
@@ -112,7 +180,7 @@ class FileOperationTool:
|
|
|
112
180
|
return {
|
|
113
181
|
"success": False,
|
|
114
182
|
"stdout": "",
|
|
115
|
-
"stderr": f"
|
|
183
|
+
"stderr": f"文件操作失败 {abs_path}: {str(e)}"
|
|
116
184
|
}
|
|
117
185
|
|
|
118
186
|
def execute(self, args: Dict) -> Dict[str, Any]:
|
|
@@ -134,7 +202,7 @@ class FileOperationTool:
|
|
|
134
202
|
return {
|
|
135
203
|
"success": False,
|
|
136
204
|
"stdout": "",
|
|
137
|
-
"stderr": "files
|
|
205
|
+
"stderr": "files参数是必需的,且必须是一个列表"
|
|
138
206
|
}
|
|
139
207
|
|
|
140
208
|
all_outputs = []
|
|
@@ -156,7 +224,7 @@ class FileOperationTool:
|
|
|
156
224
|
if result["success"]:
|
|
157
225
|
all_outputs.append(result["stdout"])
|
|
158
226
|
else:
|
|
159
|
-
all_outputs.append(f"
|
|
227
|
+
all_outputs.append(f"处理文件 {file_info['path']} 时出错: {result['stderr']}")
|
|
160
228
|
success = success and result["success"]
|
|
161
229
|
|
|
162
230
|
# Combine all outputs with separators
|
|
@@ -173,5 +241,5 @@ class FileOperationTool:
|
|
|
173
241
|
return {
|
|
174
242
|
"success": False,
|
|
175
243
|
"stdout": "",
|
|
176
|
-
"stderr": f"
|
|
244
|
+
"stderr": f"文件操作失败: {str(e)}"
|
|
177
245
|
}
|
|
@@ -7,6 +7,7 @@ from jarvis.jarvis_utils.methodology import load_methodology
|
|
|
7
7
|
class FindMethodologyTool:
|
|
8
8
|
name = "find_methodology"
|
|
9
9
|
description = "方法论查找工具,用于在执行过程中查看历史方法论辅助决策"
|
|
10
|
+
labels = ['methodology', 'search', 'analysis']
|
|
10
11
|
parameters = {
|
|
11
12
|
"type": "object",
|
|
12
13
|
"properties": {
|
|
@@ -18,7 +19,7 @@ class FindMethodologyTool:
|
|
|
18
19
|
"required": ["query"]
|
|
19
20
|
}
|
|
20
21
|
|
|
21
|
-
def execute(self, args: Dict) -> Dict[str, Any]:
|
|
22
|
+
def execute(self, args: Dict[str, Any]) -> Dict[str, Any]:
|
|
22
23
|
"""执行方法论查找操作
|
|
23
24
|
|
|
24
25
|
Args:
|
|
@@ -8,6 +8,8 @@ class LSPGetDiagnosticsTool:
|
|
|
8
8
|
name = "lsp_get_diagnostics"
|
|
9
9
|
# 工具描述
|
|
10
10
|
description = "Get diagnostic information (errors, warnings) from code files"
|
|
11
|
+
# 工具标签
|
|
12
|
+
labels = ['code', 'analysis', 'lsp']
|
|
11
13
|
# 工具参数定义
|
|
12
14
|
parameters = {
|
|
13
15
|
"file_path": "Path to the file to analyze",
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import json
|
|
3
|
-
import glob
|
|
4
3
|
import hashlib
|
|
5
|
-
from typing import Dict,
|
|
4
|
+
from typing import Dict, Any
|
|
6
5
|
|
|
7
6
|
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
8
7
|
|
|
@@ -13,6 +12,7 @@ class MethodologyTool:
|
|
|
13
12
|
|
|
14
13
|
name = "methodology"
|
|
15
14
|
description = "管理问题解决方法论,支持添加、更新和删除操作"
|
|
15
|
+
labels = ['analysis', 'planning']
|
|
16
16
|
parameters = {
|
|
17
17
|
"type": "object",
|
|
18
18
|
"properties": {
|
|
@@ -23,7 +23,7 @@ class MethodologyTool:
|
|
|
23
23
|
},
|
|
24
24
|
"problem_type": {
|
|
25
25
|
"type": "string",
|
|
26
|
-
"description": "
|
|
26
|
+
"description": "问题类型,例如:部署开源项目、生成提交信息"
|
|
27
27
|
},
|
|
28
28
|
"content": {
|
|
29
29
|
"type": "string",
|
|
@@ -82,7 +82,7 @@ class MethodologyTool:
|
|
|
82
82
|
return {
|
|
83
83
|
"success": False,
|
|
84
84
|
"stdout": "",
|
|
85
|
-
"stderr": "
|
|
85
|
+
"stderr": "缺少必要参数: operation和problem_type"
|
|
86
86
|
}
|
|
87
87
|
|
|
88
88
|
try:
|
|
@@ -95,14 +95,14 @@ class MethodologyTool:
|
|
|
95
95
|
os.remove(file_path)
|
|
96
96
|
return {
|
|
97
97
|
"success": True,
|
|
98
|
-
"stdout": f"
|
|
98
|
+
"stdout": f"已删除问题类型'{problem_type}'对应的方法论",
|
|
99
99
|
"stderr": ""
|
|
100
100
|
}
|
|
101
101
|
else:
|
|
102
102
|
return {
|
|
103
103
|
"success": False,
|
|
104
104
|
"stdout": "",
|
|
105
|
-
"stderr": f"
|
|
105
|
+
"stderr": f"未找到问题类型'{problem_type}'对应的方法论"
|
|
106
106
|
}
|
|
107
107
|
|
|
108
108
|
elif operation in ["update", "add"]:
|
|
@@ -110,7 +110,7 @@ class MethodologyTool:
|
|
|
110
110
|
return {
|
|
111
111
|
"success": False,
|
|
112
112
|
"stdout": "",
|
|
113
|
-
"stderr": "
|
|
113
|
+
"stderr": "需要提供方法论内容"
|
|
114
114
|
}
|
|
115
115
|
|
|
116
116
|
# 确保目录存在
|
|
@@ -128,10 +128,10 @@ class MethodologyTool:
|
|
|
128
128
|
|
|
129
129
|
PrettyOutput.print(f"方法论已保存到 {file_path}", OutputType.INFO)
|
|
130
130
|
|
|
131
|
-
action = "
|
|
131
|
+
action = "更新" if os.path.exists(file_path) else "添加"
|
|
132
132
|
return {
|
|
133
133
|
"success": True,
|
|
134
|
-
"stdout": f"{action}
|
|
134
|
+
"stdout": f"{action}了问题类型'{problem_type}'对应的方法论",
|
|
135
135
|
"stderr": ""
|
|
136
136
|
}
|
|
137
137
|
|
|
@@ -139,13 +139,13 @@ class MethodologyTool:
|
|
|
139
139
|
return {
|
|
140
140
|
"success": False,
|
|
141
141
|
"stdout": "",
|
|
142
|
-
"stderr": f"
|
|
142
|
+
"stderr": f"不支持的操作类型: {operation}"
|
|
143
143
|
}
|
|
144
144
|
|
|
145
145
|
except Exception as e:
|
|
146
146
|
return {
|
|
147
147
|
"success": False,
|
|
148
148
|
"stdout": "",
|
|
149
|
-
"stderr": f"
|
|
149
|
+
"stderr": f"执行失败: {str(e)}"
|
|
150
150
|
}
|
|
151
151
|
|
jarvis/jarvis_tools/read_code.py
CHANGED
|
@@ -10,6 +10,8 @@ from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
|
10
10
|
class ReadCodeTool:
|
|
11
11
|
name = "read_code"
|
|
12
12
|
description = "代码阅读与分析工具,用于读取源代码文件并添加行号,针对代码文件优化,提供更好的格式化输出和行号显示,适用于代码分析、审查和理解代码实现的场景"
|
|
13
|
+
# 工具标签
|
|
14
|
+
labels = ['code', 'analysis', 'file']
|
|
13
15
|
parameters = {
|
|
14
16
|
"type": "object",
|
|
15
17
|
"properties": {
|
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
from typing import Dict, Any
|
|
2
|
-
|
|
3
|
-
from
|
|
4
|
-
from urllib.parse import urlparse, urljoin
|
|
5
|
-
import re
|
|
6
|
-
|
|
2
|
+
import os
|
|
3
|
+
from jarvis.jarvis_platform.registry import PlatformRegistry
|
|
7
4
|
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
8
5
|
|
|
9
6
|
class WebpageTool:
|
|
10
7
|
name = "read_webpage"
|
|
11
8
|
description = "读取网页内容,提取标题、文本和超链接"
|
|
9
|
+
labels = ['web', 'scraping']
|
|
12
10
|
parameters = {
|
|
13
11
|
"type": "object",
|
|
14
12
|
"properties": {
|
|
@@ -20,79 +18,41 @@ class WebpageTool:
|
|
|
20
18
|
"required": ["url"]
|
|
21
19
|
}
|
|
22
20
|
|
|
23
|
-
def
|
|
24
|
-
"""
|
|
21
|
+
def __init__(self):
|
|
22
|
+
if os.getenv("YUANBAO_COOKIES", "") != "" and os.getenv("YUANBAO_AGENT_ID", "") != "":
|
|
23
|
+
self.platform = "yuanbao"
|
|
24
|
+
self.model = "deep_seek"
|
|
25
|
+
elif os.getenv("KIMI_API_KEY", "") != "":
|
|
26
|
+
self.platform = "kimi"
|
|
27
|
+
self.model = "k1"
|
|
28
|
+
else:
|
|
29
|
+
self.platform = ""
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def check() -> bool:
|
|
33
|
+
return os.getenv("YUANBAO_COOKIES", "") != "" and os.getenv("YUANBAO_AGENT_ID", "") != "" or os.getenv("KIMI_API_KEY", "") != ""
|
|
34
|
+
|
|
35
|
+
def execute(self, args: Dict[str, Any]) -> Dict[str, Any]:
|
|
36
|
+
"""Read webpage content using Yuanbao model"""
|
|
25
37
|
try:
|
|
26
38
|
url = args["url"].strip()
|
|
39
|
+
|
|
40
|
+
# Create Yuanbao model instance
|
|
41
|
+
model = PlatformRegistry().create_platform(self.platform)
|
|
42
|
+
model.set_suppress_output(False) # type: ignore
|
|
43
|
+
model.set_model_name(self.model) # type: ignore
|
|
27
44
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
browser = p.chromium.launch(
|
|
31
|
-
headless=True,
|
|
32
|
-
args=['--disable-gpu', '--no-sandbox', '--disable-dev-shm-usage']
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
# Create a new page with appropriate settings
|
|
36
|
-
page = browser.new_page(
|
|
37
|
-
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
|
38
|
-
viewport={'width': 1920, 'height': 1080}
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
# Set timeout to avoid long waits
|
|
42
|
-
page.set_default_timeout(30000) # 30 seconds
|
|
43
|
-
|
|
44
|
-
try:
|
|
45
|
-
# Navigate to URL and wait for page to load
|
|
46
|
-
response = page.goto(url, wait_until="domcontentloaded")
|
|
47
|
-
|
|
48
|
-
# Additional wait for network to be idle (with a timeout)
|
|
49
|
-
try:
|
|
50
|
-
page.wait_for_load_state("networkidle", timeout=10000)
|
|
51
|
-
except PlaywrightTimeoutError:
|
|
52
|
-
# Continue even if network doesn't become completely idle
|
|
53
|
-
pass
|
|
54
|
-
|
|
55
|
-
# Make sure we got a valid response
|
|
56
|
-
if not response or response.status >= 400:
|
|
57
|
-
raise Exception(f"Failed to load page: HTTP {response.status if response else 'No response'}")
|
|
58
|
-
|
|
59
|
-
# Get page title safely
|
|
60
|
-
title = "No title"
|
|
61
|
-
try:
|
|
62
|
-
title = page.title()
|
|
63
|
-
except Exception:
|
|
64
|
-
# Try to extract title from content if direct method fails
|
|
65
|
-
try:
|
|
66
|
-
title_element = page.query_selector("title")
|
|
67
|
-
if title_element:
|
|
68
|
-
title = title_element.text_content() or "No title"
|
|
69
|
-
except Exception:
|
|
70
|
-
pass
|
|
71
|
-
|
|
72
|
-
# Get the HTML content after JavaScript execution
|
|
73
|
-
html_content = page.content()
|
|
45
|
+
# Construct prompt for webpage reading
|
|
46
|
+
prompt = f"请帮我读取并总结这个网页的内容:{url}\n请以markdown格式输出,包含标题和主要内容。"
|
|
74
47
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
finally:
|
|
78
|
-
# Always close browser
|
|
79
|
-
browser.close()
|
|
48
|
+
# Get response from Yuanbao model
|
|
49
|
+
response = model.chat_until_success(prompt) # type: ignore
|
|
80
50
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
f"# {title}",
|
|
87
|
-
f"Url: {url}",
|
|
88
|
-
markdown_content
|
|
89
|
-
]
|
|
90
|
-
|
|
91
|
-
return {
|
|
92
|
-
"success": True,
|
|
93
|
-
"stdout": "\n".join(output),
|
|
94
|
-
"stderr": ""
|
|
95
|
-
}
|
|
51
|
+
return {
|
|
52
|
+
"success": True,
|
|
53
|
+
"stdout": response,
|
|
54
|
+
"stderr": ""
|
|
55
|
+
}
|
|
96
56
|
|
|
97
57
|
except Exception as e:
|
|
98
58
|
PrettyOutput.print(f"读取网页失败: {str(e)}", OutputType.ERROR)
|
|
@@ -101,126 +61,3 @@ class WebpageTool:
|
|
|
101
61
|
"stdout": "",
|
|
102
62
|
"stderr": f"Failed to parse webpage: {str(e)}"
|
|
103
63
|
}
|
|
104
|
-
|
|
105
|
-
def _create_soup_element(self, content):
|
|
106
|
-
"""Safely create a BeautifulSoup element, ensuring it's treated as markup"""
|
|
107
|
-
if isinstance(content, str):
|
|
108
|
-
# Create a wrapper tag to ensure proper parsing
|
|
109
|
-
soup_div = BeautifulSoup(f"<div>{content}</div>", 'html.parser').div
|
|
110
|
-
if soup_div is not None:
|
|
111
|
-
return soup_div.contents
|
|
112
|
-
# Return an empty list if the div is None
|
|
113
|
-
return []
|
|
114
|
-
return content
|
|
115
|
-
|
|
116
|
-
def _html_to_markdown(self, html_content: str, base_url: str) -> str:
|
|
117
|
-
"""Convert HTML to Markdown format preserving the content structure"""
|
|
118
|
-
soup = BeautifulSoup(html_content, 'html.parser')
|
|
119
|
-
|
|
120
|
-
# Remove unwanted elements
|
|
121
|
-
for element in soup(['script', 'style', 'meta', 'noscript', 'head']):
|
|
122
|
-
element.decompose()
|
|
123
|
-
|
|
124
|
-
# Process headings
|
|
125
|
-
for level in range(1, 7):
|
|
126
|
-
for heading in soup.find_all(f'h{level}'):
|
|
127
|
-
text = heading.get_text().strip()
|
|
128
|
-
heading_md = "\n\n" + "#" * level + " " + text + "\n\n"
|
|
129
|
-
new_element = self._create_soup_element(heading_md)
|
|
130
|
-
heading.replace_with(*new_element)
|
|
131
|
-
|
|
132
|
-
# Process paragraphs
|
|
133
|
-
for p in soup.find_all('p'):
|
|
134
|
-
text = p.get_text().strip()
|
|
135
|
-
if text:
|
|
136
|
-
new_element = self._create_soup_element("\n\n" + text + "\n\n")
|
|
137
|
-
p.replace_with(*new_element)
|
|
138
|
-
|
|
139
|
-
# Process unordered lists
|
|
140
|
-
for ul in soup.find_all('ul'):
|
|
141
|
-
items = []
|
|
142
|
-
for li in ul.find_all('li', recursive=False):
|
|
143
|
-
items.append("* " + li.get_text().strip())
|
|
144
|
-
new_element = self._create_soup_element("\n\n" + "\n".join(items) + "\n\n")
|
|
145
|
-
ul.replace_with(*new_element)
|
|
146
|
-
|
|
147
|
-
# Process ordered lists
|
|
148
|
-
for ol in soup.find_all('ol'):
|
|
149
|
-
items = []
|
|
150
|
-
for i, li in enumerate(ol.find_all('li', recursive=False), 1):
|
|
151
|
-
items.append(str(i) + ". " + li.get_text().strip())
|
|
152
|
-
new_element = self._create_soup_element("\n\n" + "\n".join(items) + "\n\n")
|
|
153
|
-
ol.replace_with(*new_element)
|
|
154
|
-
|
|
155
|
-
# Process links (first pass)
|
|
156
|
-
for a in soup.find_all('a', href=True):
|
|
157
|
-
try:
|
|
158
|
-
href = a['href']
|
|
159
|
-
text = a.get_text().strip()
|
|
160
|
-
if text and href:
|
|
161
|
-
# Convert relative URLs to absolute
|
|
162
|
-
if href.startswith('/') and not href.startswith('//'):
|
|
163
|
-
href = urljoin(base_url, href)
|
|
164
|
-
link_md = "[" + text + "](" + href + ")"
|
|
165
|
-
new_element = self._create_soup_element(link_md)
|
|
166
|
-
a.replace_with(*new_element)
|
|
167
|
-
except (KeyError, AttributeError):
|
|
168
|
-
continue
|
|
169
|
-
|
|
170
|
-
# Process images
|
|
171
|
-
for img in soup.find_all('img', src=True):
|
|
172
|
-
try:
|
|
173
|
-
src = img['src']
|
|
174
|
-
alt = img.get('alt', 'Image').strip()
|
|
175
|
-
# Convert relative URLs to absolute
|
|
176
|
-
if src.startswith('/') and not src.startswith('//'):
|
|
177
|
-
src = urljoin(base_url, src)
|
|
178
|
-
img_md = ""
|
|
179
|
-
new_element = self._create_soup_element(img_md)
|
|
180
|
-
img.replace_with(*new_element)
|
|
181
|
-
except (KeyError, AttributeError, UnboundLocalError):
|
|
182
|
-
continue
|
|
183
|
-
|
|
184
|
-
# Process code blocks
|
|
185
|
-
for pre in soup.find_all('pre'):
|
|
186
|
-
code = pre.get_text().strip()
|
|
187
|
-
pre_md = "\n\n```\n" + code + "\n```\n\n"
|
|
188
|
-
new_element = self._create_soup_element(pre_md)
|
|
189
|
-
pre.replace_with(*new_element)
|
|
190
|
-
|
|
191
|
-
# Process inline code
|
|
192
|
-
for code in soup.find_all('code'):
|
|
193
|
-
text = code.get_text().strip()
|
|
194
|
-
code_md = "`" + text + "`"
|
|
195
|
-
new_element = self._create_soup_element(code_md)
|
|
196
|
-
code.replace_with(*new_element)
|
|
197
|
-
|
|
198
|
-
# Process line breaks
|
|
199
|
-
for br in soup.find_all('br'):
|
|
200
|
-
new_element = self._create_soup_element('\n')
|
|
201
|
-
br.replace_with(*new_element)
|
|
202
|
-
|
|
203
|
-
# Get the full text
|
|
204
|
-
markdown_text = soup.get_text()
|
|
205
|
-
|
|
206
|
-
# Clean up extra whitespace and line breaks
|
|
207
|
-
markdown_text = re.sub(r'\n{3,}', '\n\n', markdown_text)
|
|
208
|
-
markdown_text = re.sub(r'\s{2,}', ' ', markdown_text)
|
|
209
|
-
|
|
210
|
-
# Process links again (for any that might have been missed)
|
|
211
|
-
link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
|
212
|
-
all_links = re.findall(link_pattern, markdown_text)
|
|
213
|
-
|
|
214
|
-
# Add a section with all links at the end
|
|
215
|
-
if all_links:
|
|
216
|
-
link_section = ["", "## Links", ""]
|
|
217
|
-
seen_links = set()
|
|
218
|
-
for text, href in all_links:
|
|
219
|
-
link_entry = "[" + text + "](" + href + ")"
|
|
220
|
-
if link_entry not in seen_links:
|
|
221
|
-
link_section.append(link_entry)
|
|
222
|
-
seen_links.add(link_entry)
|
|
223
|
-
|
|
224
|
-
markdown_text += "\n\n" + "\n".join(link_section)
|
|
225
|
-
|
|
226
|
-
return markdown_text.strip()
|