jarvis-ai-assistant 0.1.138__py3-none-any.whl → 0.1.141__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jarvis-ai-assistant might be problematic. Click here for more details.

Files changed (85) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +62 -14
  3. jarvis/jarvis_agent/builtin_input_handler.py +4 -14
  4. jarvis/jarvis_agent/main.py +1 -1
  5. jarvis/jarvis_agent/patch.py +37 -40
  6. jarvis/jarvis_agent/shell_input_handler.py +2 -3
  7. jarvis/jarvis_code_agent/code_agent.py +23 -30
  8. jarvis/jarvis_code_analysis/checklists/__init__.py +3 -0
  9. jarvis/jarvis_code_analysis/checklists/c_cpp.py +50 -0
  10. jarvis/jarvis_code_analysis/checklists/csharp.py +75 -0
  11. jarvis/jarvis_code_analysis/checklists/data_format.py +82 -0
  12. jarvis/jarvis_code_analysis/checklists/devops.py +107 -0
  13. jarvis/jarvis_code_analysis/checklists/docs.py +87 -0
  14. jarvis/jarvis_code_analysis/checklists/go.py +52 -0
  15. jarvis/jarvis_code_analysis/checklists/infrastructure.py +98 -0
  16. jarvis/jarvis_code_analysis/checklists/java.py +66 -0
  17. jarvis/jarvis_code_analysis/checklists/javascript.py +73 -0
  18. jarvis/jarvis_code_analysis/checklists/kotlin.py +107 -0
  19. jarvis/jarvis_code_analysis/checklists/loader.py +76 -0
  20. jarvis/jarvis_code_analysis/checklists/php.py +77 -0
  21. jarvis/jarvis_code_analysis/checklists/python.py +56 -0
  22. jarvis/jarvis_code_analysis/checklists/ruby.py +107 -0
  23. jarvis/jarvis_code_analysis/checklists/rust.py +58 -0
  24. jarvis/jarvis_code_analysis/checklists/shell.py +75 -0
  25. jarvis/jarvis_code_analysis/checklists/sql.py +72 -0
  26. jarvis/jarvis_code_analysis/checklists/swift.py +77 -0
  27. jarvis/jarvis_code_analysis/checklists/web.py +97 -0
  28. jarvis/jarvis_code_analysis/code_review.py +660 -0
  29. jarvis/jarvis_dev/main.py +61 -88
  30. jarvis/jarvis_git_squash/main.py +3 -3
  31. jarvis/jarvis_git_utils/git_commiter.py +242 -0
  32. jarvis/jarvis_init/main.py +62 -0
  33. jarvis/jarvis_platform/base.py +4 -0
  34. jarvis/jarvis_platform/kimi.py +173 -5
  35. jarvis/jarvis_platform/openai.py +3 -0
  36. jarvis/jarvis_platform/registry.py +1 -0
  37. jarvis/jarvis_platform/yuanbao.py +275 -5
  38. jarvis/jarvis_tools/ask_codebase.py +6 -9
  39. jarvis/jarvis_tools/ask_user.py +17 -5
  40. jarvis/jarvis_tools/base.py +3 -1
  41. jarvis/jarvis_tools/chdir.py +1 -0
  42. jarvis/jarvis_tools/create_code_agent.py +4 -3
  43. jarvis/jarvis_tools/create_sub_agent.py +1 -0
  44. jarvis/jarvis_tools/execute_script.py +170 -0
  45. jarvis/jarvis_tools/file_analyzer.py +90 -239
  46. jarvis/jarvis_tools/file_operation.py +99 -31
  47. jarvis/jarvis_tools/{find_methodolopy.py → find_methodology.py} +2 -1
  48. jarvis/jarvis_tools/lsp_get_diagnostics.py +2 -0
  49. jarvis/jarvis_tools/methodology.py +11 -11
  50. jarvis/jarvis_tools/read_code.py +2 -0
  51. jarvis/jarvis_tools/read_webpage.py +33 -196
  52. jarvis/jarvis_tools/registry.py +68 -131
  53. jarvis/jarvis_tools/search_web.py +14 -6
  54. jarvis/jarvis_tools/virtual_tty.py +399 -0
  55. jarvis/jarvis_utils/config.py +29 -3
  56. jarvis/jarvis_utils/embedding.py +0 -317
  57. jarvis/jarvis_utils/file_processors.py +343 -0
  58. jarvis/jarvis_utils/input.py +0 -1
  59. jarvis/jarvis_utils/methodology.py +94 -435
  60. jarvis/jarvis_utils/utils.py +207 -9
  61. {jarvis_ai_assistant-0.1.138.dist-info → jarvis_ai_assistant-0.1.141.dist-info}/METADATA +4 -4
  62. jarvis_ai_assistant-0.1.141.dist-info/RECORD +94 -0
  63. {jarvis_ai_assistant-0.1.138.dist-info → jarvis_ai_assistant-0.1.141.dist-info}/entry_points.txt +4 -4
  64. jarvis/jarvis_code_agent/file_select.py +0 -202
  65. jarvis/jarvis_platform/ai8.py +0 -268
  66. jarvis/jarvis_platform/ollama.py +0 -137
  67. jarvis/jarvis_platform/oyi.py +0 -307
  68. jarvis/jarvis_rag/file_processors.py +0 -138
  69. jarvis/jarvis_rag/main.py +0 -1734
  70. jarvis/jarvis_tools/code_review.py +0 -333
  71. jarvis/jarvis_tools/execute_python_script.py +0 -58
  72. jarvis/jarvis_tools/execute_shell.py +0 -97
  73. jarvis/jarvis_tools/execute_shell_script.py +0 -58
  74. jarvis/jarvis_tools/find_caller.py +0 -278
  75. jarvis/jarvis_tools/find_symbol.py +0 -295
  76. jarvis/jarvis_tools/function_analyzer.py +0 -331
  77. jarvis/jarvis_tools/git_commiter.py +0 -167
  78. jarvis/jarvis_tools/project_analyzer.py +0 -304
  79. jarvis/jarvis_tools/rag.py +0 -143
  80. jarvis/jarvis_tools/tool_generator.py +0 -221
  81. jarvis_ai_assistant-0.1.138.dist-info/RECORD +0 -85
  82. /jarvis/{jarvis_rag → jarvis_init}/__init__.py +0 -0
  83. {jarvis_ai_assistant-0.1.138.dist-info → jarvis_ai_assistant-0.1.141.dist-info}/LICENSE +0 -0
  84. {jarvis_ai_assistant-0.1.138.dist-info → jarvis_ai_assistant-0.1.141.dist-info}/WHEEL +0 -0
  85. {jarvis_ai_assistant-0.1.138.dist-info → jarvis_ai_assistant-0.1.141.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,23 @@
1
1
  from typing import Dict, Any
2
2
  import os
3
+ from pathlib import Path
3
4
 
4
5
  from yaspin import yaspin
5
6
 
6
7
  from jarvis.jarvis_utils.globals import add_read_file_record
7
8
  from jarvis.jarvis_utils.output import OutputType, PrettyOutput
9
+ # 导入文件处理器
10
+ from jarvis.jarvis_utils.file_processors import (
11
+ TextFileProcessor, PDFProcessor, DocxProcessor,
12
+ PPTProcessor, ExcelProcessor
13
+ )
8
14
 
9
15
 
10
16
 
11
17
  class FileOperationTool:
12
18
  name = "file_operation"
13
- description = "文件批量操作工具,可批量读写多个文件,适用于需要同时处理多个文件的场景(读取配置文件、保存生成内容等),不提供代码分析功能"
19
+ description = "文件批量操作工具,可批量读写多个文件,支持文本、PDF、Word、Excel、PPT等格式,适用于需要同时处理多个文件的场景(读取配置文件、保存生成内容等)"
20
+ labels = ['file', 'io', 'batch']
14
21
  parameters = {
15
22
  "type": "object",
16
23
  "properties": {
@@ -35,12 +42,29 @@ class FileOperationTool:
35
42
  "required": ["operation", "files"]
36
43
  }
37
44
 
45
+ def _get_file_processor(self, file_path: str):
46
+ """获取适合处理指定文件的处理器"""
47
+ processors = [
48
+ PDFProcessor, # PDF文件处理器
49
+ DocxProcessor, # Word文档处理器
50
+ PPTProcessor, # PowerPoint演示文稿处理器
51
+ ExcelProcessor, # Excel表格处理器
52
+ TextFileProcessor # 文本文件处理器(放在最后作为兜底)
53
+ ]
54
+
55
+ for processor in processors:
56
+ if processor.can_handle(file_path):
57
+ return processor
58
+
59
+ return None # 如果没有合适的处理器,返回None
60
+
38
61
  def _handle_single_file(self, operation: str, filepath: str, content: str = "",
39
62
  start_line: int = 1, end_line: int = -1) -> Dict[str, Any]:
40
63
  """Handle operations for a single file"""
41
64
  try:
42
65
  abs_path = os.path.abspath(filepath)
43
66
  add_read_file_record(abs_path)
67
+
44
68
  if operation == "read":
45
69
  with yaspin(text=f"正在读取文件: {abs_path}...", color="cyan") as spinner:
46
70
  if not os.path.exists(abs_path):
@@ -50,38 +74,82 @@ class FileOperationTool:
50
74
  "stderr": f"文件不存在: {abs_path}"
51
75
  }
52
76
 
53
- if os.path.getsize(abs_path) > 10 * 1024 * 1024: # 10MB
77
+ # 检查文件大小
78
+ if os.path.getsize(abs_path) > 30 * 1024 * 1024: # 30MB
54
79
  return {
55
80
  "success": False,
56
81
  "stdout": "",
57
- "stderr": "File too large (>10MB)"
82
+ "stderr": "文件过大 (>30MB),无法处理"
58
83
  }
59
-
60
- with open(abs_path, 'r', encoding='utf-8', errors="ignore") as f:
61
- lines = f.readlines()
62
-
63
-
64
- total_lines = len(lines)
65
- start_line = start_line if start_line >= 0 else total_lines + start_line + 1
66
- end_line = end_line if end_line >= 0 else total_lines + end_line + 1
67
- start_line = max(1, min(start_line, total_lines))
68
- end_line = max(1, min(end_line, total_lines))
69
- if end_line == -1:
70
- end_line = total_lines
71
-
72
- if start_line > end_line:
73
- spinner.text = "无效的行范围"
74
- spinner.fail("❌")
75
- error_msg = f"无效的行范围 [{start_line, end_line}] (文件总行数: {total_lines})"
84
+
85
+ file_extension = Path(abs_path).suffix.lower()
86
+
87
+ # 获取文件处理器
88
+ processor = self._get_file_processor(abs_path)
89
+
90
+ if processor is None:
76
91
  return {
77
92
  "success": False,
78
93
  "stdout": "",
79
- "stderr": error_msg
94
+ "stderr": f"不支持的文件类型: {file_extension}"
80
95
  }
81
-
82
- content = "".join(lines[start_line - 1:end_line])
83
- output = f"\n文件: {abs_path}\n行: [{start_line}-{end_line}]\n{content}" + "\n\n"
84
-
96
+
97
+ # 特殊处理纯文本文件,支持行范围选择
98
+ if processor == TextFileProcessor:
99
+ try:
100
+ with open(abs_path, 'r', encoding='utf-8', errors="ignore") as f:
101
+ lines = f.readlines()
102
+
103
+ total_lines = len(lines)
104
+ start_line = start_line if start_line >= 0 else total_lines + start_line + 1
105
+ end_line = end_line if end_line >= 0 else total_lines + end_line + 1
106
+ start_line = max(1, min(start_line, total_lines))
107
+ end_line = max(1, min(end_line, total_lines))
108
+ if end_line == -1:
109
+ end_line = total_lines
110
+
111
+ if start_line > end_line:
112
+ spinner.text = "无效的行范围"
113
+ spinner.fail("❌")
114
+ error_msg = f"无效的行范围 [{start_line, end_line}] (文件总行数: {total_lines})"
115
+ return {
116
+ "success": False,
117
+ "stdout": "",
118
+ "stderr": error_msg
119
+ }
120
+
121
+ content = "".join(lines[start_line - 1:end_line])
122
+ file_info = f"\n文件: {abs_path} (文本文件)\n行: [{start_line}-{end_line}]/{total_lines}"
123
+ except Exception as e:
124
+ return {
125
+ "success": False,
126
+ "stdout": "",
127
+ "stderr": f"读取文本文件失败: {str(e)}"
128
+ }
129
+ else:
130
+ # 使用专用处理器来提取非文本文件的内容
131
+ try:
132
+ spinner.text = f"使用 {processor.__name__} 提取 {abs_path} 的内容..."
133
+ content = processor.extract_text(abs_path)
134
+ # 获取文件类型友好名称
135
+ file_type_names = {
136
+ PDFProcessor: "PDF文档",
137
+ DocxProcessor: "Word文档",
138
+ PPTProcessor: "PowerPoint演示文稿",
139
+ ExcelProcessor: "Excel表格"
140
+ }
141
+ file_type = file_type_names.get(processor, file_extension)
142
+ file_info = f"\n文件: {abs_path} ({file_type})"
143
+ except Exception as e:
144
+ return {
145
+ "success": False,
146
+ "stdout": "",
147
+ "stderr": f"提取 {file_extension} 文件内容失败: {str(e)}"
148
+ }
149
+
150
+ # 构建输出信息
151
+ output = f"{file_info}\n{content}" + "\n\n"
152
+
85
153
  spinner.text = f"文件读取完成: {abs_path}"
86
154
  spinner.ok("✅")
87
155
  return {
@@ -98,13 +166,13 @@ class FileOperationTool:
98
166
  spinner.ok("✅")
99
167
  return {
100
168
  "success": True,
101
- "stdout": f"Successfully wrote content to {abs_path}",
169
+ "stdout": f"文件写入成功: {abs_path}",
102
170
  "stderr": ""
103
171
  }
104
172
  return {
105
173
  "success": False,
106
174
  "stdout": "",
107
- "stderr": f"Unknown operation: {operation}"
175
+ "stderr": f"未知操作: {operation}"
108
176
  }
109
177
 
110
178
  except Exception as e:
@@ -112,7 +180,7 @@ class FileOperationTool:
112
180
  return {
113
181
  "success": False,
114
182
  "stdout": "",
115
- "stderr": f"File operation failed for {abs_path}: {str(e)}"
183
+ "stderr": f"文件操作失败 {abs_path}: {str(e)}"
116
184
  }
117
185
 
118
186
  def execute(self, args: Dict) -> Dict[str, Any]:
@@ -134,7 +202,7 @@ class FileOperationTool:
134
202
  return {
135
203
  "success": False,
136
204
  "stdout": "",
137
- "stderr": "files parameter is required and must be a list"
205
+ "stderr": "files参数是必需的,且必须是一个列表"
138
206
  }
139
207
 
140
208
  all_outputs = []
@@ -156,7 +224,7 @@ class FileOperationTool:
156
224
  if result["success"]:
157
225
  all_outputs.append(result["stdout"])
158
226
  else:
159
- all_outputs.append(f"Error with {file_info['path']}: {result['stderr']}")
227
+ all_outputs.append(f"处理文件 {file_info['path']} 时出错: {result['stderr']}")
160
228
  success = success and result["success"]
161
229
 
162
230
  # Combine all outputs with separators
@@ -173,5 +241,5 @@ class FileOperationTool:
173
241
  return {
174
242
  "success": False,
175
243
  "stdout": "",
176
- "stderr": f"File operation failed: {str(e)}"
244
+ "stderr": f"文件操作失败: {str(e)}"
177
245
  }
@@ -7,6 +7,7 @@ from jarvis.jarvis_utils.methodology import load_methodology
7
7
  class FindMethodologyTool:
8
8
  name = "find_methodology"
9
9
  description = "方法论查找工具,用于在执行过程中查看历史方法论辅助决策"
10
+ labels = ['methodology', 'search', 'analysis']
10
11
  parameters = {
11
12
  "type": "object",
12
13
  "properties": {
@@ -18,7 +19,7 @@ class FindMethodologyTool:
18
19
  "required": ["query"]
19
20
  }
20
21
 
21
- def execute(self, args: Dict) -> Dict[str, Any]:
22
+ def execute(self, args: Dict[str, Any]) -> Dict[str, Any]:
22
23
  """执行方法论查找操作
23
24
 
24
25
  Args:
@@ -8,6 +8,8 @@ class LSPGetDiagnosticsTool:
8
8
  name = "lsp_get_diagnostics"
9
9
  # 工具描述
10
10
  description = "Get diagnostic information (errors, warnings) from code files"
11
+ # 工具标签
12
+ labels = ['code', 'analysis', 'lsp']
11
13
  # 工具参数定义
12
14
  parameters = {
13
15
  "file_path": "Path to the file to analyze",
@@ -1,8 +1,7 @@
1
1
  import os
2
2
  import json
3
- import glob
4
3
  import hashlib
5
- from typing import Dict, Optional, Any
4
+ from typing import Dict, Any
6
5
 
7
6
  from jarvis.jarvis_utils.output import OutputType, PrettyOutput
8
7
 
@@ -13,6 +12,7 @@ class MethodologyTool:
13
12
 
14
13
  name = "methodology"
15
14
  description = "管理问题解决方法论,支持添加、更新和删除操作"
15
+ labels = ['analysis', 'planning']
16
16
  parameters = {
17
17
  "type": "object",
18
18
  "properties": {
@@ -23,7 +23,7 @@ class MethodologyTool:
23
23
  },
24
24
  "problem_type": {
25
25
  "type": "string",
26
- "description": "问题类型,例如:code_review, bug_fix 等"
26
+ "description": "问题类型,例如:部署开源项目、生成提交信息"
27
27
  },
28
28
  "content": {
29
29
  "type": "string",
@@ -82,7 +82,7 @@ class MethodologyTool:
82
82
  return {
83
83
  "success": False,
84
84
  "stdout": "",
85
- "stderr": "Missing required parameters: operation and problem_type"
85
+ "stderr": "缺少必要参数: operationproblem_type"
86
86
  }
87
87
 
88
88
  try:
@@ -95,14 +95,14 @@ class MethodologyTool:
95
95
  os.remove(file_path)
96
96
  return {
97
97
  "success": True,
98
- "stdout": f"Deleted methodology for problem type '{problem_type}'",
98
+ "stdout": f"已删除问题类型'{problem_type}'对应的方法论",
99
99
  "stderr": ""
100
100
  }
101
101
  else:
102
102
  return {
103
103
  "success": False,
104
104
  "stdout": "",
105
- "stderr": f"Methodology for problem type '{problem_type}' not found"
105
+ "stderr": f"未找到问题类型'{problem_type}'对应的方法论"
106
106
  }
107
107
 
108
108
  elif operation in ["update", "add"]:
@@ -110,7 +110,7 @@ class MethodologyTool:
110
110
  return {
111
111
  "success": False,
112
112
  "stdout": "",
113
- "stderr": "Need to provide methodology content"
113
+ "stderr": "需要提供方法论内容"
114
114
  }
115
115
 
116
116
  # 确保目录存在
@@ -128,10 +128,10 @@ class MethodologyTool:
128
128
 
129
129
  PrettyOutput.print(f"方法论已保存到 {file_path}", OutputType.INFO)
130
130
 
131
- action = "Updated" if os.path.exists(file_path) else "Added"
131
+ action = "更新" if os.path.exists(file_path) else "添加"
132
132
  return {
133
133
  "success": True,
134
- "stdout": f"{action} methodology for problem type '{problem_type}'",
134
+ "stdout": f"{action}了问题类型'{problem_type}'对应的方法论",
135
135
  "stderr": ""
136
136
  }
137
137
 
@@ -139,13 +139,13 @@ class MethodologyTool:
139
139
  return {
140
140
  "success": False,
141
141
  "stdout": "",
142
- "stderr": f"Unsupported operation type: {operation}"
142
+ "stderr": f"不支持的操作类型: {operation}"
143
143
  }
144
144
 
145
145
  except Exception as e:
146
146
  return {
147
147
  "success": False,
148
148
  "stdout": "",
149
- "stderr": f"Execution failed: {str(e)}"
149
+ "stderr": f"执行失败: {str(e)}"
150
150
  }
151
151
 
@@ -10,6 +10,8 @@ from jarvis.jarvis_utils.output import OutputType, PrettyOutput
10
10
  class ReadCodeTool:
11
11
  name = "read_code"
12
12
  description = "代码阅读与分析工具,用于读取源代码文件并添加行号,针对代码文件优化,提供更好的格式化输出和行号显示,适用于代码分析、审查和理解代码实现的场景"
13
+ # 工具标签
14
+ labels = ['code', 'analysis', 'file']
13
15
  parameters = {
14
16
  "type": "object",
15
17
  "properties": {
@@ -1,14 +1,12 @@
1
1
  from typing import Dict, Any
2
- from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
3
- from bs4 import BeautifulSoup, Tag
4
- from urllib.parse import urlparse, urljoin
5
- import re
6
-
2
+ import os
3
+ from jarvis.jarvis_platform.registry import PlatformRegistry
7
4
  from jarvis.jarvis_utils.output import OutputType, PrettyOutput
8
5
 
9
6
  class WebpageTool:
10
7
  name = "read_webpage"
11
8
  description = "读取网页内容,提取标题、文本和超链接"
9
+ labels = ['web', 'scraping']
12
10
  parameters = {
13
11
  "type": "object",
14
12
  "properties": {
@@ -20,79 +18,41 @@ class WebpageTool:
20
18
  "required": ["url"]
21
19
  }
22
20
 
23
- def execute(self, args: Dict) -> Dict[str, Any]:
24
- """Read webpage content using Playwright to handle JavaScript-rendered pages"""
21
+ def __init__(self):
22
+ if os.getenv("YUANBAO_COOKIES", "") != "" and os.getenv("YUANBAO_AGENT_ID", "") != "":
23
+ self.platform = "yuanbao"
24
+ self.model = "deep_seek"
25
+ elif os.getenv("KIMI_API_KEY", "") != "":
26
+ self.platform = "kimi"
27
+ self.model = "k1"
28
+ else:
29
+ self.platform = ""
30
+
31
+ @staticmethod
32
+ def check() -> bool:
33
+ return os.getenv("YUANBAO_COOKIES", "") != "" and os.getenv("YUANBAO_AGENT_ID", "") != "" or os.getenv("KIMI_API_KEY", "") != ""
34
+
35
+ def execute(self, args: Dict[str, Any]) -> Dict[str, Any]:
36
+ """Read webpage content using Yuanbao model"""
25
37
  try:
26
38
  url = args["url"].strip()
39
+
40
+ # Create Yuanbao model instance
41
+ model = PlatformRegistry().create_platform(self.platform)
42
+ model.set_suppress_output(False) # type: ignore
43
+ model.set_model_name(self.model) # type: ignore
27
44
 
28
- with sync_playwright() as p:
29
- # Launch browser
30
- browser = p.chromium.launch(
31
- headless=True,
32
- args=['--disable-gpu', '--no-sandbox', '--disable-dev-shm-usage']
33
- )
34
-
35
- # Create a new page with appropriate settings
36
- page = browser.new_page(
37
- user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
38
- viewport={'width': 1920, 'height': 1080}
39
- )
40
-
41
- # Set timeout to avoid long waits
42
- page.set_default_timeout(30000) # 30 seconds
43
-
44
- try:
45
- # Navigate to URL and wait for page to load
46
- response = page.goto(url, wait_until="domcontentloaded")
47
-
48
- # Additional wait for network to be idle (with a timeout)
49
- try:
50
- page.wait_for_load_state("networkidle", timeout=10000)
51
- except PlaywrightTimeoutError:
52
- # Continue even if network doesn't become completely idle
53
- pass
54
-
55
- # Make sure we got a valid response
56
- if not response or response.status >= 400:
57
- raise Exception(f"Failed to load page: HTTP {response.status if response else 'No response'}")
58
-
59
- # Get page title safely
60
- title = "No title"
61
- try:
62
- title = page.title()
63
- except Exception:
64
- # Try to extract title from content if direct method fails
65
- try:
66
- title_element = page.query_selector("title")
67
- if title_element:
68
- title = title_element.text_content() or "No title"
69
- except Exception:
70
- pass
71
-
72
- # Get the HTML content after JavaScript execution
73
- html_content = page.content()
45
+ # Construct prompt for webpage reading
46
+ prompt = f"请帮我读取并总结这个网页的内容:{url}\n请以markdown格式输出,包含标题和主要内容。"
74
47
 
75
- except Exception as e:
76
- raise Exception(f"Error navigating to page: {str(e)}")
77
- finally:
78
- # Always close browser
79
- browser.close()
48
+ # Get response from Yuanbao model
49
+ response = model.chat_until_success(prompt) # type: ignore
80
50
 
81
- # Parse with BeautifulSoup and convert to markdown
82
- markdown_content = self._html_to_markdown(html_content, url)
83
-
84
- # Build output in markdown format
85
- output = [
86
- f"# {title}",
87
- f"Url: {url}",
88
- markdown_content
89
- ]
90
-
91
- return {
92
- "success": True,
93
- "stdout": "\n".join(output),
94
- "stderr": ""
95
- }
51
+ return {
52
+ "success": True,
53
+ "stdout": response,
54
+ "stderr": ""
55
+ }
96
56
 
97
57
  except Exception as e:
98
58
  PrettyOutput.print(f"读取网页失败: {str(e)}", OutputType.ERROR)
@@ -101,126 +61,3 @@ class WebpageTool:
101
61
  "stdout": "",
102
62
  "stderr": f"Failed to parse webpage: {str(e)}"
103
63
  }
104
-
105
- def _create_soup_element(self, content):
106
- """Safely create a BeautifulSoup element, ensuring it's treated as markup"""
107
- if isinstance(content, str):
108
- # Create a wrapper tag to ensure proper parsing
109
- soup_div = BeautifulSoup(f"<div>{content}</div>", 'html.parser').div
110
- if soup_div is not None:
111
- return soup_div.contents
112
- # Return an empty list if the div is None
113
- return []
114
- return content
115
-
116
- def _html_to_markdown(self, html_content: str, base_url: str) -> str:
117
- """Convert HTML to Markdown format preserving the content structure"""
118
- soup = BeautifulSoup(html_content, 'html.parser')
119
-
120
- # Remove unwanted elements
121
- for element in soup(['script', 'style', 'meta', 'noscript', 'head']):
122
- element.decompose()
123
-
124
- # Process headings
125
- for level in range(1, 7):
126
- for heading in soup.find_all(f'h{level}'):
127
- text = heading.get_text().strip()
128
- heading_md = "\n\n" + "#" * level + " " + text + "\n\n"
129
- new_element = self._create_soup_element(heading_md)
130
- heading.replace_with(*new_element)
131
-
132
- # Process paragraphs
133
- for p in soup.find_all('p'):
134
- text = p.get_text().strip()
135
- if text:
136
- new_element = self._create_soup_element("\n\n" + text + "\n\n")
137
- p.replace_with(*new_element)
138
-
139
- # Process unordered lists
140
- for ul in soup.find_all('ul'):
141
- items = []
142
- for li in ul.find_all('li', recursive=False):
143
- items.append("* " + li.get_text().strip())
144
- new_element = self._create_soup_element("\n\n" + "\n".join(items) + "\n\n")
145
- ul.replace_with(*new_element)
146
-
147
- # Process ordered lists
148
- for ol in soup.find_all('ol'):
149
- items = []
150
- for i, li in enumerate(ol.find_all('li', recursive=False), 1):
151
- items.append(str(i) + ". " + li.get_text().strip())
152
- new_element = self._create_soup_element("\n\n" + "\n".join(items) + "\n\n")
153
- ol.replace_with(*new_element)
154
-
155
- # Process links (first pass)
156
- for a in soup.find_all('a', href=True):
157
- try:
158
- href = a['href']
159
- text = a.get_text().strip()
160
- if text and href:
161
- # Convert relative URLs to absolute
162
- if href.startswith('/') and not href.startswith('//'):
163
- href = urljoin(base_url, href)
164
- link_md = "[" + text + "](" + href + ")"
165
- new_element = self._create_soup_element(link_md)
166
- a.replace_with(*new_element)
167
- except (KeyError, AttributeError):
168
- continue
169
-
170
- # Process images
171
- for img in soup.find_all('img', src=True):
172
- try:
173
- src = img['src']
174
- alt = img.get('alt', 'Image').strip()
175
- # Convert relative URLs to absolute
176
- if src.startswith('/') and not src.startswith('//'):
177
- src = urljoin(base_url, src)
178
- img_md = "![" + alt + "](" + src + ")"
179
- new_element = self._create_soup_element(img_md)
180
- img.replace_with(*new_element)
181
- except (KeyError, AttributeError, UnboundLocalError):
182
- continue
183
-
184
- # Process code blocks
185
- for pre in soup.find_all('pre'):
186
- code = pre.get_text().strip()
187
- pre_md = "\n\n```\n" + code + "\n```\n\n"
188
- new_element = self._create_soup_element(pre_md)
189
- pre.replace_with(*new_element)
190
-
191
- # Process inline code
192
- for code in soup.find_all('code'):
193
- text = code.get_text().strip()
194
- code_md = "`" + text + "`"
195
- new_element = self._create_soup_element(code_md)
196
- code.replace_with(*new_element)
197
-
198
- # Process line breaks
199
- for br in soup.find_all('br'):
200
- new_element = self._create_soup_element('\n')
201
- br.replace_with(*new_element)
202
-
203
- # Get the full text
204
- markdown_text = soup.get_text()
205
-
206
- # Clean up extra whitespace and line breaks
207
- markdown_text = re.sub(r'\n{3,}', '\n\n', markdown_text)
208
- markdown_text = re.sub(r'\s{2,}', ' ', markdown_text)
209
-
210
- # Process links again (for any that might have been missed)
211
- link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
212
- all_links = re.findall(link_pattern, markdown_text)
213
-
214
- # Add a section with all links at the end
215
- if all_links:
216
- link_section = ["", "## Links", ""]
217
- seen_links = set()
218
- for text, href in all_links:
219
- link_entry = "[" + text + "](" + href + ")"
220
- if link_entry not in seen_links:
221
- link_section.append(link_entry)
222
- seen_links.add(link_entry)
223
-
224
- markdown_text += "\n\n" + "\n".join(link_section)
225
-
226
- return markdown_text.strip()