beswarm 0.1.52__py3-none-any.whl → 0.1.54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- beswarm/aient/setup.py +1 -1
- beswarm/aient/src/aient/models/chatgpt.py +21 -4
- beswarm/aient/src/aient/plugins/list_directory.py +5 -5
- beswarm/aient/src/aient/plugins/read_image.py +54 -0
- beswarm/aient/src/aient/plugins/websearch.py +3 -3
- beswarm/aient/src/aient/prompt/agent.py +2 -0
- beswarm/tools/UIworker.py +3 -1
- beswarm/tools/__init__.py +4 -0
- beswarm/tools/search_web.py +296 -0
- beswarm/tools/worker.py +23 -16
- {beswarm-0.1.52.dist-info → beswarm-0.1.54.dist-info}/METADATA +1 -1
- {beswarm-0.1.52.dist-info → beswarm-0.1.54.dist-info}/RECORD +14 -12
- {beswarm-0.1.52.dist-info → beswarm-0.1.54.dist-info}/WHEEL +0 -0
- {beswarm-0.1.52.dist-info → beswarm-0.1.54.dist-info}/top_level.txt +0 -0
beswarm/aient/setup.py
CHANGED
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
|
|
4
4
|
|
5
5
|
setup(
|
6
6
|
name="aient",
|
7
|
-
version="1.1.
|
7
|
+
version="1.1.10",
|
8
8
|
description="Aient: The Awakening of Agent.",
|
9
9
|
long_description=Path.open(Path("README.md"), encoding="utf-8").read(),
|
10
10
|
long_description_content_type="text/markdown",
|
@@ -152,8 +152,25 @@ class chatgpt(BaseLLM):
|
|
152
152
|
else:
|
153
153
|
last_user_message = self.conversation[convo_id][-1]["content"]
|
154
154
|
if last_user_message != message:
|
155
|
+
image_message_list = []
|
156
|
+
if isinstance(function_arguments, str):
|
157
|
+
functions_list = json.loads(function_arguments)
|
158
|
+
else:
|
159
|
+
functions_list = function_arguments
|
160
|
+
for tool_info in functions_list:
|
161
|
+
if tool_info.get("base64_image"):
|
162
|
+
image_message_list.append({"type": "text", "text": safe_get(tool_info, "parameter", "image_path", default="") + " image:"})
|
163
|
+
image_message_list.append({
|
164
|
+
"type": "image_url",
|
165
|
+
"image_url": {
|
166
|
+
"url": tool_info["base64_image"],
|
167
|
+
}
|
168
|
+
})
|
155
169
|
self.conversation[convo_id].append({"role": "assistant", "content": convert_functions_to_xml(function_arguments)})
|
156
|
-
|
170
|
+
if image_message_list:
|
171
|
+
self.conversation[convo_id].append({"role": "user", "content": [{"type": "text", "text": message}] + image_message_list})
|
172
|
+
else:
|
173
|
+
self.conversation[convo_id].append({"role": "user", "content": message})
|
157
174
|
else:
|
158
175
|
self.conversation[convo_id].append({"role": "assistant", "content": "我已经执行过这个工具了,接下来我需要做什么?"})
|
159
176
|
|
@@ -545,10 +562,10 @@ class chatgpt(BaseLLM):
|
|
545
562
|
self.latest_file_content[tool_info['parameter']["file_path"]] = tool_response
|
546
563
|
all_responses.append(f"[{tool_name}({tool_args}) Result]:\n\nRead file successfully! The file content has been updated in the tag <latest_file_content>.")
|
547
564
|
elif tool_name == "write_to_file":
|
548
|
-
# change_tools_args = copy.deepcopy(tool_info['parameter'])
|
549
|
-
# change_tools_args["content"] = "...文件已写入,内容已省略以节省上下文..."
|
550
|
-
# tool_args = json.dumps(change_tools_args, ensure_ascii=False) if not isinstance(change_tools_args, str) else change_tools_args
|
551
565
|
all_responses.append(f"[{tool_name} Result]:\n\n{tool_response}")
|
566
|
+
elif tool_name == "read_image":
|
567
|
+
tool_info["base64_image"] = tool_response
|
568
|
+
all_responses.append(f"[{tool_name}({tool_args}) Result]:\n\nRead image successfully!")
|
552
569
|
else:
|
553
570
|
all_responses.append(f"[{tool_name}({tool_args}) Result]:\n\n{tool_response}")
|
554
571
|
|
@@ -5,13 +5,13 @@ from .registry import register_tool
|
|
5
5
|
@register_tool()
|
6
6
|
def list_directory(path="."):
|
7
7
|
"""
|
8
|
-
|
8
|
+
列出指定目录中的所有文件和子目录
|
9
9
|
|
10
|
-
|
11
|
-
|
10
|
+
参数:
|
11
|
+
path: 要列出内容的目录路径,默认为当前目录
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
返回:
|
14
|
+
目录内容的列表字符串
|
15
15
|
"""
|
16
16
|
try:
|
17
17
|
# 获取目录内容
|
@@ -0,0 +1,54 @@
|
|
1
|
+
import os
|
2
|
+
import base64
|
3
|
+
import mimetypes
|
4
|
+
from .registry import register_tool
|
5
|
+
|
6
|
+
@register_tool()
|
7
|
+
def read_image(image_path: str):
|
8
|
+
"""
|
9
|
+
读取本地图片文件,将其转换为 Base64 编码,并返回包含 MIME 类型和完整数据的字符串。
|
10
|
+
此工具用于将图片内容加载到上下文中。
|
11
|
+
|
12
|
+
参数:
|
13
|
+
image_path (str): 本地图片文件的路径。
|
14
|
+
|
15
|
+
返回:
|
16
|
+
str: 成功时返回包含图片MIME类型和Base64编码数据的格式化字符串。
|
17
|
+
失败时返回错误信息字符串。
|
18
|
+
"""
|
19
|
+
try:
|
20
|
+
# 检查路径是否存在
|
21
|
+
if not os.path.exists(image_path):
|
22
|
+
return f"错误: 图片路径 '{image_path}' 不存在。"
|
23
|
+
# 检查是否为文件
|
24
|
+
if not os.path.isfile(image_path):
|
25
|
+
return f"错误: 路径 '{image_path}' 不是一个有效的文件 (可能是一个目录)。"
|
26
|
+
|
27
|
+
# 尝试猜测MIME类型
|
28
|
+
mime_type, _ = mimetypes.guess_type(image_path) # encoding 变量通常不需要
|
29
|
+
|
30
|
+
if not mime_type or not mime_type.startswith('image/'):
|
31
|
+
# 如果mimetypes无法识别,或者不是图片类型
|
32
|
+
return f"错误: 文件 '{image_path}' 的MIME类型无法识别为图片 (检测到: {mime_type})。请确保文件是常见的图片格式 (e.g., PNG, JPG, GIF, WEBP)。"
|
33
|
+
|
34
|
+
with open(image_path, "rb") as image_file:
|
35
|
+
image_data = image_file.read()
|
36
|
+
|
37
|
+
base64_encoded_data = base64.b64encode(image_data).decode('utf-8')
|
38
|
+
|
39
|
+
# 返回一个描述性字符串,模仿 list_directory.py 的风格
|
40
|
+
# 包含完整的 Base64 数据
|
41
|
+
# 注意:对于非常大的图片,这可能会产生非常长的输出字符串。
|
42
|
+
# return f"成功读取图片 '{image_path}':\n MIME 类型: {mime_type}\n Base64 数据: {base64_encoded_data}"
|
43
|
+
return f"data:{mime_type};base64," + base64_encoded_data
|
44
|
+
|
45
|
+
except FileNotFoundError:
|
46
|
+
# 这个异常通常由 open() 抛出,如果 os.path.exists 通过但文件在读取前被删除
|
47
|
+
# 或者路径检查逻辑未能完全覆盖所有情况 (理论上不应发生)
|
48
|
+
return f"错误: 图片路径 '{image_path}' 未找到 (可能在检查后被删除或移动)。"
|
49
|
+
except PermissionError:
|
50
|
+
return f"错误: 没有权限访问图片路径 '{image_path}'。"
|
51
|
+
except IOError as e: # 例如文件损坏无法读取,或磁盘问题
|
52
|
+
return f"错误: 读取图片 '{image_path}' 时发生 I/O 错误: {e}"
|
53
|
+
except Exception as e:
|
54
|
+
return f"读取图片 '{image_path}' 时发生未知错误: {e}"
|
@@ -124,10 +124,10 @@ def jina_ai_Web_crawler(url: str, isSearch=False) -> str:
|
|
124
124
|
@register_tool()
|
125
125
|
def get_url_content(url: str) -> str:
|
126
126
|
"""
|
127
|
-
|
127
|
+
获取 url 的网页内容,以 markdown 格式返回给用户
|
128
128
|
|
129
|
-
|
130
|
-
|
129
|
+
:param url: 要爬取的网页URL
|
130
|
+
:return: 网页内容
|
131
131
|
"""
|
132
132
|
markdown_content = url_to_markdown(url)
|
133
133
|
# print(markdown_content)
|
beswarm/tools/UIworker.py
CHANGED
@@ -107,7 +107,9 @@ async def UIworker(goal, tools, work_dir, cache_messages=None):
|
|
107
107
|
"""
|
108
108
|
# 让指令agent分析对话历史并生成新指令
|
109
109
|
instruction_agent = chatgpt(**instruction_agent_config)
|
110
|
-
|
110
|
+
conversation_history = copy.deepcopy(work_agent.conversation["default"])
|
111
|
+
conversation_history.pop(0)
|
112
|
+
instruction_agent.conversation["default"][1:] = conversation_history
|
111
113
|
new_prompt = await get_current_screen_image_message(instruction_prompt)
|
112
114
|
next_instruction = await instruction_agent.ask_async(new_prompt)
|
113
115
|
print("\n🤖 指令智能体生成的下一步指令:", next_instruction)
|
beswarm/tools/__init__.py
CHANGED
@@ -6,6 +6,7 @@ from .UIworker import UIworker
|
|
6
6
|
from .search_arxiv import search_arxiv
|
7
7
|
from .repomap import get_code_repo_map
|
8
8
|
from .click import find_and_click_element, scroll_screen
|
9
|
+
from .search_web import search_web
|
9
10
|
#显式导入 aient.plugins 中的所需内容
|
10
11
|
from ..aient.src.aient.plugins import (
|
11
12
|
excute_command,
|
@@ -18,6 +19,7 @@ from ..aient.src.aient.plugins import (
|
|
18
19
|
write_to_file,
|
19
20
|
download_read_arxiv_pdf,
|
20
21
|
get_url_content,
|
22
|
+
read_image,
|
21
23
|
register_tool,
|
22
24
|
)
|
23
25
|
|
@@ -29,6 +31,7 @@ __all__ = [
|
|
29
31
|
"get_code_repo_map",
|
30
32
|
# aient.plugins
|
31
33
|
"excute_command",
|
34
|
+
"read_image",
|
32
35
|
"get_time",
|
33
36
|
"generate_image",
|
34
37
|
"list_directory",
|
@@ -42,4 +45,5 @@ __all__ = [
|
|
42
45
|
"scroll_screen",
|
43
46
|
"register_tool",
|
44
47
|
"UIworker",
|
48
|
+
"search_web",
|
45
49
|
]
|
@@ -0,0 +1,296 @@
|
|
1
|
+
import re
|
2
|
+
import os
|
3
|
+
import json
|
4
|
+
import httpx
|
5
|
+
import threading
|
6
|
+
|
7
|
+
from ..aient.src.aient.plugins import register_tool, get_url_content # Assuming a similar plugin structure
|
8
|
+
|
9
|
+
class ThreadWithReturnValue(threading.Thread):
|
10
|
+
def run(self):
|
11
|
+
if self._target is not None:
|
12
|
+
self._return = self._target(*self._args, **self._kwargs)
|
13
|
+
|
14
|
+
def join(self):
|
15
|
+
super().join()
|
16
|
+
return self._return
|
17
|
+
|
18
|
+
@register_tool()
|
19
|
+
async def search_web(query: str):
|
20
|
+
"""
|
21
|
+
获取 Google 搜索结果。
|
22
|
+
|
23
|
+
参数:
|
24
|
+
query (str): 要在 Google 上搜索的查询字符串。
|
25
|
+
|
26
|
+
返回:
|
27
|
+
dict: 包含搜索结果的字典,如果发生错误则包含错误信息。
|
28
|
+
"""
|
29
|
+
api_key = os.environ.get('THORDATA_KEY')
|
30
|
+
if not api_key:
|
31
|
+
raise ValueError("THORDATA_KEY is not set in environment variables")
|
32
|
+
|
33
|
+
api_url = "https://scraperapi.thordata.com/request"
|
34
|
+
headers = {
|
35
|
+
"Content-Type": "application/json",
|
36
|
+
"Authorization": f"Bearer {api_key}" # 请注意:硬编码的 API 密钥
|
37
|
+
}
|
38
|
+
payload = {
|
39
|
+
"url": f"https://www.google.com/search?q={query}"
|
40
|
+
}
|
41
|
+
results = []
|
42
|
+
|
43
|
+
try:
|
44
|
+
async with httpx.AsyncClient() as client:
|
45
|
+
response = await client.post(api_url, headers=headers, json=payload)
|
46
|
+
response.raise_for_status() # 如果状态码是 4xx 或 5xx,则引发 HTTPStatusError
|
47
|
+
results = response.json()
|
48
|
+
except httpx.HTTPStatusError as e:
|
49
|
+
return {
|
50
|
+
"error": f"HTTP error occurred: {e.response.status_code} - {e.response.text}",
|
51
|
+
"status_code": e.response.status_code
|
52
|
+
}
|
53
|
+
except httpx.RequestError as e:
|
54
|
+
return {
|
55
|
+
"error": f"An error occurred while requesting {e.request.url!r}: {e}",
|
56
|
+
"request_url": str(e.request.url)
|
57
|
+
}
|
58
|
+
except json.JSONDecodeError:
|
59
|
+
return {
|
60
|
+
"error": "Failed to decode JSON response from the API.",
|
61
|
+
"response_text": response.text if 'response' in locals() else "No response text available"
|
62
|
+
}
|
63
|
+
except Exception as e:
|
64
|
+
return {
|
65
|
+
"error": f"An unexpected error occurred: {str(e)}"
|
66
|
+
}
|
67
|
+
|
68
|
+
unique_urls = []
|
69
|
+
if "error" in results:
|
70
|
+
print(f"Error fetching search results for '{query}':")
|
71
|
+
print(json.dumps(results, indent=2, ensure_ascii=False))
|
72
|
+
else:
|
73
|
+
# print(f"Search results for '{query}':")
|
74
|
+
html_content = results.get("data", {}).get("result", {}).get("html", "")
|
75
|
+
if html_content:
|
76
|
+
# 使用正则表达式查找所有 URL
|
77
|
+
# 导入 html 和 urllib.parse 模块
|
78
|
+
import html
|
79
|
+
import urllib.parse
|
80
|
+
|
81
|
+
# 1. 初步提取潜在的 URL 字符串
|
82
|
+
# 使用更宽容的正则,允许末尾有非URL字符,后续清理
|
83
|
+
candidate_urls = re.findall(r'https?://[^\s"]+|www\.[^\s"]+', html_content)
|
84
|
+
|
85
|
+
processed_urls = []
|
86
|
+
for url_str in candidate_urls:
|
87
|
+
# 2. 解码十六进制表示 (例如 \x26 -> &)
|
88
|
+
try:
|
89
|
+
def replace_hex(match):
|
90
|
+
return chr(int(match.group(1), 16))
|
91
|
+
url_str = re.sub(r'\\x([0-9a-fA-F]{2})', replace_hex, url_str)
|
92
|
+
except ValueError:
|
93
|
+
pass
|
94
|
+
|
95
|
+
# 3. 解码 HTML 实体 (例如 & -> &)
|
96
|
+
url_str = html.unescape(url_str)
|
97
|
+
|
98
|
+
# 4. 解码 URL 百分号编码 (例如 %3F -> ?, %3D -> =)
|
99
|
+
url_str = urllib.parse.unquote(url_str)
|
100
|
+
|
101
|
+
# 5. 精确截断已知的非 URL 参数或模式
|
102
|
+
# 截断 ved= 参数
|
103
|
+
if 'ved=' in url_str:
|
104
|
+
url_str = url_str.split('ved=', 1)[0]
|
105
|
+
url_str = url_str.rstrip('&?') # 移除可能残留的末尾 & 或 ?
|
106
|
+
|
107
|
+
# 6. 迭代移除末尾的 HTML 标签
|
108
|
+
# 例如 </cite>, <div...>, </span></span>
|
109
|
+
old_url_len = -1
|
110
|
+
while old_url_len != len(url_str): # 循环直到字符串不再变短
|
111
|
+
old_url_len = len(url_str)
|
112
|
+
# 移除末尾的完整闭合标签, e.g., </div>
|
113
|
+
url_str = re.sub(r'</[^>]+>$', '', url_str)
|
114
|
+
# 移除末尾的开始标签或不完整标签, e.g., <cite or <div
|
115
|
+
# (包括 < 开头到结尾的所有内容)
|
116
|
+
url_str = re.sub(r'<[^>]*$', '', url_str)
|
117
|
+
# 移除末尾的 > 单独字符,如果标签移除后残留
|
118
|
+
url_str = url_str.rstrip('>')
|
119
|
+
|
120
|
+
|
121
|
+
# 7. 移除末尾的常见非URL字符 (引号,特定标点)
|
122
|
+
# 注意顺序,这个应该在HTML标签移除后
|
123
|
+
url_str = url_str.rstrip('\'";,.?!<>()[]{}') # '<' 也在这里再次检查
|
124
|
+
|
125
|
+
# 8. 移除末尾单独的 '&' 字符 (在所有其他清理之后)
|
126
|
+
url_str = url_str.rstrip('&')
|
127
|
+
url_str = url_str.split("#:~:")[0]
|
128
|
+
|
129
|
+
if url_str: #确保URL不为空
|
130
|
+
processed_urls.append(url_str)
|
131
|
+
|
132
|
+
# 定义要过滤的域名列表
|
133
|
+
excluded_domains = [
|
134
|
+
"www.w3.org",
|
135
|
+
"www.google.com",
|
136
|
+
"ssl.gstatic.com",
|
137
|
+
"translate.google.com",
|
138
|
+
"www.googleadservices.com",
|
139
|
+
"gstatic.com",
|
140
|
+
"lens.google.com",
|
141
|
+
"schema.org",
|
142
|
+
"id.google.com",
|
143
|
+
"maps.google.com",
|
144
|
+
"clients6.google.com",
|
145
|
+
"ogs.google.com",
|
146
|
+
"policies.google.com",
|
147
|
+
"support.google.com",
|
148
|
+
"tpc.googlesyndication.com",
|
149
|
+
"adssettings.google.com"
|
150
|
+
]
|
151
|
+
|
152
|
+
final_urls_before_dedup = []
|
153
|
+
for url in processed_urls:
|
154
|
+
if not url:
|
155
|
+
continue
|
156
|
+
if not any(excluded_domain in url for excluded_domain in excluded_domains):
|
157
|
+
# 9. 进一步规范化
|
158
|
+
# 9a. 移除末尾的 /
|
159
|
+
normalized_url = url.rstrip('/')
|
160
|
+
|
161
|
+
# 9b. 添加默认协议 (https) 如果缺失
|
162
|
+
if normalized_url and not normalized_url.startswith(('http://', 'https://')):
|
163
|
+
normalized_url = 'https://' + normalized_url
|
164
|
+
|
165
|
+
if normalized_url:
|
166
|
+
final_urls_before_dedup.append(normalized_url)
|
167
|
+
|
168
|
+
# 10. 去重
|
169
|
+
temp_unique_urls_set = set(final_urls_before_dedup)
|
170
|
+
temp_unique_urls_set.discard("https://baike.baidu.com")
|
171
|
+
temp_unique_urls_set.discard("https://zhuanlan.zhihu.com")
|
172
|
+
unique_urls = sorted(list(temp_unique_urls_set))
|
173
|
+
|
174
|
+
results = unique_urls
|
175
|
+
if not results:
|
176
|
+
return "No search results returned or results list is empty."
|
177
|
+
|
178
|
+
web_contents_raw = []
|
179
|
+
if results and isinstance(results, list) and len(results) > 0:
|
180
|
+
# print(f"Fetching content for {len(results)} URLs...")
|
181
|
+
# threads = []
|
182
|
+
# for url in url_set_list:
|
183
|
+
# # url_search_thread = ThreadWithReturnValue(target=jina_ai_Web_crawler, args=(url,True,))
|
184
|
+
# url_search_thread = ThreadWithReturnValue(target=get_url_content, args=(url,))
|
185
|
+
# # url_search_thread = ThreadWithReturnValue(target=Web_crawler, args=(url,True,))
|
186
|
+
# url_search_thread.start()
|
187
|
+
# threads.append(url_search_thread)
|
188
|
+
threads = []
|
189
|
+
for i, link in enumerate(results):
|
190
|
+
print(f"Processing URL {i + 1}/{len(results)}: {link}")
|
191
|
+
# Assuming get_url_content is synchronous and returns a string or None
|
192
|
+
# content_text = get_url_content(link)
|
193
|
+
url_search_thread = ThreadWithReturnValue(target=get_url_content, args=(link,))
|
194
|
+
url_search_thread.start()
|
195
|
+
threads.append(url_search_thread)
|
196
|
+
|
197
|
+
for thread in threads:
|
198
|
+
content_text = thread.join()
|
199
|
+
# content_text = thread.get_result()
|
200
|
+
if content_text and len(content_text.split("\n\n")) > 10: # Ensure content_text is not None or empty before adding
|
201
|
+
web_contents_raw.append({"url": link, "content": str(content_text)}) # Ensure content is string
|
202
|
+
else:
|
203
|
+
print(f"Warning: Failed to get content or content was empty for URL: {link}")
|
204
|
+
elif not results or (isinstance(results, list) and len(results) == 0) :
|
205
|
+
print("No search results returned or results list is empty.")
|
206
|
+
else:
|
207
|
+
print(f"Search results in unexpected format: {type(results)}")
|
208
|
+
|
209
|
+
# print(f"Fetched {len(web_contents_raw)} web contents with text.")
|
210
|
+
|
211
|
+
if not web_contents_raw:
|
212
|
+
return "No web content"
|
213
|
+
# if not web_contents_raw:
|
214
|
+
# print("No web content with text to process for similarity.")
|
215
|
+
# output_filename = "web_content_filtered.json"
|
216
|
+
# with open(output_filename, "w", encoding="utf-8") as f:
|
217
|
+
# json.dump([], f, indent=2, ensure_ascii=False)
|
218
|
+
# print(f"Empty list saved to {output_filename}")
|
219
|
+
# return
|
220
|
+
|
221
|
+
# output_filename = "web_content.json"
|
222
|
+
# with open(output_filename, "w", encoding="utf-8") as f:
|
223
|
+
# json.dump(web_contents_raw, f, indent=2, ensure_ascii=False)
|
224
|
+
|
225
|
+
n = len(web_contents_raw)
|
226
|
+
to_keep_flags = [True] * n # Flags to mark which items to keep
|
227
|
+
|
228
|
+
# print("Starting similarity comparison...")
|
229
|
+
for i in range(n):
|
230
|
+
if not to_keep_flags[i]: # Skip if item i is already marked for discard
|
231
|
+
continue
|
232
|
+
|
233
|
+
content_i = web_contents_raw[i].get('content', "")
|
234
|
+
if not isinstance(content_i, str):
|
235
|
+
content_i = str(content_i) # Fallback, though str(content_text) above should handle it
|
236
|
+
|
237
|
+
for j in range(i + 1, n):
|
238
|
+
if not to_keep_flags[j]: # Skip if item j is already marked for discard
|
239
|
+
continue
|
240
|
+
|
241
|
+
content_j = web_contents_raw[j].get('content', "")
|
242
|
+
if not isinstance(content_j, str):
|
243
|
+
content_j = str(content_j) # Fallback
|
244
|
+
|
245
|
+
similarity = calculate_similarity(content_i, content_j)
|
246
|
+
|
247
|
+
if similarity > 0.9:
|
248
|
+
# print(f"Similarity > 0.9 ({similarity:.4f}) between content from '{web_contents_raw[i]['url']}' and '{web_contents_raw[j]['url']}'. Discarding the latter.")
|
249
|
+
to_keep_flags[j] = False # Discard the second item (item j)
|
250
|
+
|
251
|
+
final_web_content = [web_contents_raw[i] for i in range(n) if to_keep_flags[i]]
|
252
|
+
# print(f"Number of items after filtering: {len(final_web_content)}")
|
253
|
+
|
254
|
+
# output_filename = "web_content_filtered.json"
|
255
|
+
# with open(output_filename, "w", encoding="utf-8") as f:
|
256
|
+
# json.dump(final_web_content, f, indent=2, ensure_ascii=False)
|
257
|
+
# print(f"Filtered web content saved to {output_filename}")
|
258
|
+
final_result = ""
|
259
|
+
for item in final_web_content:
|
260
|
+
final_result += item["content"]
|
261
|
+
final_result += "\n\n"
|
262
|
+
if not final_result:
|
263
|
+
return "No web content"
|
264
|
+
return final_result
|
265
|
+
|
266
|
+
import difflib
|
267
|
+
|
268
|
+
|
269
|
+
def calculate_similarity(string1: str, string2: str) -> float:
|
270
|
+
"""Calculates the similarity ratio between two strings.
|
271
|
+
|
272
|
+
Args:
|
273
|
+
string1: The first string.
|
274
|
+
string2: The second string.
|
275
|
+
|
276
|
+
Returns:
|
277
|
+
A float between 0 and 1, where 1 means the strings are identical
|
278
|
+
and 0 means they are completely different.
|
279
|
+
"""
|
280
|
+
return difflib.SequenceMatcher(None, string1, string2).ratio()
|
281
|
+
|
282
|
+
if __name__ == '__main__':
|
283
|
+
import asyncio
|
284
|
+
import re
|
285
|
+
|
286
|
+
async def main():
|
287
|
+
# 示例用法
|
288
|
+
search_query = "美国"
|
289
|
+
print(f"Performing web search for: '{search_query}'")
|
290
|
+
results = await search_web(search_query) # results is a list of URLs
|
291
|
+
|
292
|
+
print(results)
|
293
|
+
|
294
|
+
asyncio.run(main())
|
295
|
+
|
296
|
+
# python -m beswarm.tools.search_web
|
beswarm/tools/worker.py
CHANGED
@@ -64,31 +64,38 @@ async def worker(goal, tools, work_dir, cache_messages=None):
|
|
64
64
|
work_agent = chatgpt(**work_agent_config)
|
65
65
|
async def instruction_agent_task():
|
66
66
|
while True:
|
67
|
-
# 指令agent初始化
|
68
|
-
instruction_agent = chatgpt(**instruction_agent_config)
|
69
|
-
|
70
67
|
# 获取工作agent的对话历史
|
71
|
-
conversation_history = copy.deepcopy(work_agent.conversation["default"])
|
72
|
-
conversation_history.pop(0)
|
73
|
-
|
74
|
-
conversation_len = len(conversation_history) - 1
|
75
|
-
message_index = 0
|
76
|
-
while message_index < conversation_len:
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
68
|
+
# conversation_history = copy.deepcopy(work_agent.conversation["default"])
|
69
|
+
# conversation_history.pop(0)
|
70
|
+
|
71
|
+
# conversation_len = len(conversation_history) - 1
|
72
|
+
# message_index = 0
|
73
|
+
# while message_index < conversation_len:
|
74
|
+
# if isinstance(conversation_history[message_index]["content"], str) and conversation_history[message_index]["content"].strip() == "":
|
75
|
+
# conversation_history.pop(message_index)
|
76
|
+
# conversation_len = conversation_len - 1
|
77
|
+
# elif isinstance(conversation_history[message_index]["content"], list) and \
|
78
|
+
# len(conversation_history[message_index]["content"]) > 0 and \
|
79
|
+
# conversation_history[message_index]["content"][0].get("type") == "text" and \
|
80
|
+
# conversation_history[message_index]["content"][0].get("text").strip() == "":
|
81
|
+
# conversation_history.pop(message_index)
|
82
|
+
# conversation_len = conversation_len - 1
|
83
|
+
# else:
|
84
|
+
# message_index = message_index + 1
|
82
85
|
|
83
86
|
instruction_prompt = f"""
|
87
|
+
</work_agent_conversation_end>
|
84
88
|
任务目标: {goal}
|
85
89
|
|
86
|
-
|
87
|
-
{conversation_history}
|
90
|
+
在 tag <work_agent_conversation_start>...</work_agent_conversation_end> 之前的对话历史都是工作智能体的对话历史。
|
88
91
|
|
89
92
|
根据以上对话历史和目标,请生成下一步指令。如果任务已完成,请回复"任务已完成"。
|
90
93
|
"""
|
91
94
|
# 让指令agent分析对话历史并生成新指令
|
95
|
+
instruction_agent = chatgpt(**instruction_agent_config)
|
96
|
+
conversation_history = copy.deepcopy(work_agent.conversation["default"])
|
97
|
+
conversation_history.pop(0)
|
98
|
+
instruction_agent.conversation["default"][1:] = conversation_history
|
92
99
|
next_instruction = await instruction_agent.ask_async(instruction_prompt)
|
93
100
|
print("\n🤖 指令智能体生成的下一步指令:", next_instruction)
|
94
101
|
if "fetch_gpt_response_stream HTTP Error', 'status_code': 404" in next_instruction:
|
@@ -1,7 +1,7 @@
|
|
1
1
|
beswarm/__init__.py,sha256=HZjUOJtZR5QhMuDbq-wukQQn1VrBusNWai_ysGo-VVI,20
|
2
2
|
beswarm/utils.py,sha256=AdDCcqAIIKQEMl7PfryVgeT9G5sHe7QNsZnrvmTGA8E,283
|
3
3
|
beswarm/aient/main.py,sha256=SiYAIgQlLJqYusnTVEJOx1WNkSJKMImhgn5aWjfroxg,3814
|
4
|
-
beswarm/aient/setup.py,sha256=
|
4
|
+
beswarm/aient/setup.py,sha256=pEuQlRYVBNxsLM_do29MTAS2kOlza6CQYJl_Lii8iYA,487
|
5
5
|
beswarm/aient/src/aient/__init__.py,sha256=SRfF7oDVlOOAi6nGKiJIUK6B_arqYLO9iSMp-2IZZps,21
|
6
6
|
beswarm/aient/src/aient/core/__init__.py,sha256=NxjebTlku35S4Dzr16rdSqSTWUvvwEeACe8KvHJnjPg,34
|
7
7
|
beswarm/aient/src/aient/core/log_config.py,sha256=kz2_yJv1p-o3lUQOwA3qh-LSc3wMHv13iCQclw44W9c,274
|
@@ -16,7 +16,7 @@ beswarm/aient/src/aient/core/test/test_payload.py,sha256=8jBiJY1uidm1jzL-EiK0s6U
|
|
16
16
|
beswarm/aient/src/aient/models/__init__.py,sha256=ouNDNvoBBpIFrLsk09Q_sq23HR0GbLAKfGLIFmfEuXE,219
|
17
17
|
beswarm/aient/src/aient/models/audio.py,sha256=kRd-8-WXzv4vwvsTGwnstK-WR8--vr9CdfCZzu8y9LA,1934
|
18
18
|
beswarm/aient/src/aient/models/base.py,sha256=z-Z0pJfTN2x0cuwfvu0BdMRY9O-RmLwHEnBIJN1x4Fg,6719
|
19
|
-
beswarm/aient/src/aient/models/chatgpt.py,sha256=
|
19
|
+
beswarm/aient/src/aient/models/chatgpt.py,sha256=6SuMfV8n0pBOaKN3WGqhPc53_LHBdyOECQgVsHscso8,46169
|
20
20
|
beswarm/aient/src/aient/models/claude.py,sha256=JezghW7y0brl4Y5qiSHvnYR5prQCFywX4RViHt39pGI,26037
|
21
21
|
beswarm/aient/src/aient/models/duckduckgo.py,sha256=1l7vYCs9SG5SWPCbcl7q6pCcB5AUF_r-a4l9frz3Ogo,8115
|
22
22
|
beswarm/aient/src/aient/models/gemini.py,sha256=chGLc-8G_DAOxr10HPoOhvVFW1RvMgHd6mt--VyAW98,14730
|
@@ -28,14 +28,15 @@ beswarm/aient/src/aient/plugins/config.py,sha256=Vp6CG9ocdC_FAlCMEGtKj45xamir76D
|
|
28
28
|
beswarm/aient/src/aient/plugins/excute_command.py,sha256=A3WmfZboEikU1EHvtMWhBv-xHxCyMxbDddQ982I_8wE,10482
|
29
29
|
beswarm/aient/src/aient/plugins/get_time.py,sha256=Ih5XIW5SDAIhrZ9W4Qe5Hs1k4ieKPUc_LAd6ySNyqZk,654
|
30
30
|
beswarm/aient/src/aient/plugins/image.py,sha256=ZElCIaZznE06TN9xW3DrSukS7U3A5_cjk1Jge4NzPxw,2072
|
31
|
-
beswarm/aient/src/aient/plugins/list_directory.py,sha256=
|
31
|
+
beswarm/aient/src/aient/plugins/list_directory.py,sha256=JZVuImecMSfEv6jLqii-0uQJ1UCsrpMNmYlwW3PEDg4,1374
|
32
32
|
beswarm/aient/src/aient/plugins/read_file.py,sha256=-RRmaj-rSl8y--5VKnxCsZ1YQHe75OhnqvsDRLJyujM,8412
|
33
|
+
beswarm/aient/src/aient/plugins/read_image.py,sha256=goBnpmnmu753pQBkEROTo1ZaGE23fx5WJVr8T8z4598,2577
|
33
34
|
beswarm/aient/src/aient/plugins/registry.py,sha256=YknzhieU_8nQ3oKlUSSWDB4X7t2Jx0JnqT2Jd9Xsvfk,3574
|
34
35
|
beswarm/aient/src/aient/plugins/run_python.py,sha256=dgcUwBunMuDkaSKR5bToudVzSdrXVewktDDFUz_iIOQ,4589
|
35
|
-
beswarm/aient/src/aient/plugins/websearch.py,sha256=
|
36
|
+
beswarm/aient/src/aient/plugins/websearch.py,sha256=llxy1U0vJiNMiKvamMr4p7IruLb3nnDR4YErz8TYimc,15215
|
36
37
|
beswarm/aient/src/aient/plugins/write_file.py,sha256=YRvQKMvV-5lwohxlvwt9hjfxz2dRJP85AJWAMUIqbBY,3804
|
37
38
|
beswarm/aient/src/aient/prompt/__init__.py,sha256=GBtn6-JDT8KHFCcuPpfSNE_aGddg5p4FEyMCy4BfwGs,20
|
38
|
-
beswarm/aient/src/aient/prompt/agent.py,sha256=
|
39
|
+
beswarm/aient/src/aient/prompt/agent.py,sha256=y2GETN6ScC5yQVs75VFfzm4YUWzblbqLYz0Sy6JnPRw,24950
|
39
40
|
beswarm/aient/src/aient/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
41
|
beswarm/aient/src/aient/utils/prompt.py,sha256=UcSzKkFE4-h_1b6NofI6xgk3GoleqALRKY8VBaXLjmI,11311
|
41
42
|
beswarm/aient/src/aient/utils/scripts.py,sha256=NXmTxcZqHoRv3S13isLsv7kvqktXnA5ej7uMsxCJUe0,26656
|
@@ -118,16 +119,17 @@ beswarm/queries/tree-sitter-languages/ruby-tags.scm,sha256=vIidsCeE2A0vdFN18yXKq
|
|
118
119
|
beswarm/queries/tree-sitter-languages/rust-tags.scm,sha256=9ljM1nzhfPs_ZTRw7cr2P9ToOyhGcKkCoN4_HPXSWi4,1451
|
119
120
|
beswarm/queries/tree-sitter-languages/scala-tags.scm,sha256=UxQjz80JIrrJ7Pm56uUnQyThfmQNvwk7aQzPNypB-Ao,1761
|
120
121
|
beswarm/queries/tree-sitter-languages/typescript-tags.scm,sha256=OMdCeedPiA24ky82DpgTMKXK_l2ySTuF2zrQ2fJAi9E,1253
|
121
|
-
beswarm/tools/UIworker.py,sha256=
|
122
|
-
beswarm/tools/__init__.py,sha256
|
122
|
+
beswarm/tools/UIworker.py,sha256=1sEC76VGFwo48lSx6KOvhJwhgBj7UWAHAAH9BG_lp-M,6439
|
123
|
+
beswarm/tools/__init__.py,sha256=jOfYY4EYkwmz-FTJGrI1CyaIYkGWsmGzZBGsoupeX9M,1088
|
123
124
|
beswarm/tools/click.py,sha256=TygaekCXTmU3fIu6Uom7ZcyzEgYMlCC_GX-5SmWHuLI,20762
|
124
125
|
beswarm/tools/edit_file.py,sha256=hfpLaE4ekDiAya0Le0fJuYa-xUefWHLTxc3F6zGZd7M,6912
|
125
126
|
beswarm/tools/planner.py,sha256=lguBCS6kpwNPoXQvqH-WySabVubT82iyWOkJnjt6dXw,1265
|
126
127
|
beswarm/tools/repomap.py,sha256=CwvwoN5Swr42EzrORTTeV8MMb7mPviy4a4b0fxBu50k,40828
|
127
128
|
beswarm/tools/search_arxiv.py,sha256=9slwBemXjEqrd7-YgVmyMijPXlkhZCybEDRVhWVQ9B0,7937
|
129
|
+
beswarm/tools/search_web.py,sha256=B24amOnGHnmdV_6S8bw8O2PdhZRRIDtJjg-wXcfP7dQ,11859
|
128
130
|
beswarm/tools/think.py,sha256=WLw-7jNIsnS6n8MMSYUin_f-BGLENFmnKM2LISEp0co,1760
|
129
|
-
beswarm/tools/worker.py,sha256=
|
130
|
-
beswarm-0.1.
|
131
|
-
beswarm-0.1.
|
132
|
-
beswarm-0.1.
|
133
|
-
beswarm-0.1.
|
131
|
+
beswarm/tools/worker.py,sha256=b-FvSEP27-zMYNcqaQeVBoWxaSf2cX_7_1p1GAF6h04,6191
|
132
|
+
beswarm-0.1.54.dist-info/METADATA,sha256=XXd8q2pR-8JDzlS1YPEt0CLReDVxvppo6FzZuojJcoI,3537
|
133
|
+
beswarm-0.1.54.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
134
|
+
beswarm-0.1.54.dist-info/top_level.txt,sha256=pJw4O87wvt5882smuSO6DfByJz7FJ8SxxT8h9fHCmpo,8
|
135
|
+
beswarm-0.1.54.dist-info/RECORD,,
|
File without changes
|
File without changes
|