beswarm 0.1.34__py3-none-any.whl → 0.1.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- beswarm/aient/setup.py +1 -1
- beswarm/aient/src/aient/core/request.py +8 -6
- beswarm/aient/src/aient/core/response.py +31 -22
- beswarm/aient/src/aient/core/test/test_geminimask.py +330 -0
- beswarm/aient/src/aient/core/utils.py +14 -7
- beswarm/aient/src/aient/plugins/excute_command.py +118 -19
- beswarm/tools/UIworker.py +145 -0
- beswarm/tools/__init__.py +8 -0
- beswarm/tools/click.py +456 -0
- {beswarm-0.1.34.dist-info → beswarm-0.1.36.dist-info}/METADATA +20 -1
- {beswarm-0.1.34.dist-info → beswarm-0.1.36.dist-info}/RECORD +13 -10
- {beswarm-0.1.34.dist-info → beswarm-0.1.36.dist-info}/WHEEL +1 -1
- {beswarm-0.1.34.dist-info → beswarm-0.1.36.dist-info}/top_level.txt +0 -0
beswarm/tools/click.py
ADDED
@@ -0,0 +1,456 @@
|
|
1
|
+
import io
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
import json
|
5
|
+
import time
|
6
|
+
import base64
|
7
|
+
import pyautogui # 用于桌面屏幕点击
|
8
|
+
import pyperclip # 新增:用于操作剪贴板
|
9
|
+
import platform # 新增:用于检测操作系统
|
10
|
+
from PIL import Image, ImageDraw
|
11
|
+
from ..aient.src.aient.plugins import register_tool
|
12
|
+
|
13
|
+
from ..aient.src.aient.models import chatgpt
|
14
|
+
from ..aient.src.aient.core.utils import get_image_message, get_text_message
|
15
|
+
|
16
|
+
def display_image_with_bounding_boxes_and_masks_py(
|
17
|
+
original_image,
|
18
|
+
box_and_mask_data,
|
19
|
+
output_overlay_path="overlay_image.png",
|
20
|
+
output_compare_dir="comparison_outputs"
|
21
|
+
):
|
22
|
+
"""
|
23
|
+
在原始图像上绘制边界框和掩码,并生成裁剪区域与掩码的对比图。
|
24
|
+
|
25
|
+
Args:
|
26
|
+
original_image (str): 原始图像的文件路径。
|
27
|
+
box_and_mask_data (list): extract_box_and_mask_py 的输出列表。
|
28
|
+
output_overlay_path (str): 保存带有叠加效果的图像的路径。
|
29
|
+
output_compare_dir (str): 保存对比图像的目录路径。
|
30
|
+
"""
|
31
|
+
try:
|
32
|
+
# 修改:直接使用传入的 PIL Image 对象,并确保是 RGBA
|
33
|
+
img_original = original_image.convert("RGBA")
|
34
|
+
img_width, img_height = img_original.size
|
35
|
+
# except FileNotFoundError: # 移除:不再需要从文件加载
|
36
|
+
# print(f"Error: Original image not found at {original_image}")
|
37
|
+
# return
|
38
|
+
except Exception as e:
|
39
|
+
# 修改:更新错误消息
|
40
|
+
print(f"Error processing original image object: {e}")
|
41
|
+
return
|
42
|
+
|
43
|
+
# 创建一个副本用于绘制叠加效果
|
44
|
+
img_overlay = img_original.copy()
|
45
|
+
draw = ImageDraw.Draw(img_overlay, "RGBA") # 使用 RGBA 模式以支持透明度
|
46
|
+
|
47
|
+
# 定义颜色列表
|
48
|
+
colors_hex = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF']
|
49
|
+
# 将十六进制颜色转换为 RGBA 元组 (用于绘制)
|
50
|
+
colors_rgba = []
|
51
|
+
for hex_color in colors_hex:
|
52
|
+
h = hex_color.lstrip('#')
|
53
|
+
rgb = tuple(int(h[i:i+2], 16) for i in (0, 2, 4))
|
54
|
+
colors_rgba.append(rgb + (255,)) # (R, G, B, Alpha) - 边框完全不透明
|
55
|
+
|
56
|
+
# 创建输出目录(如果不存在)
|
57
|
+
import os
|
58
|
+
os.makedirs(output_compare_dir, exist_ok=True)
|
59
|
+
|
60
|
+
print(f"Found {len(box_and_mask_data)} box/mask pairs to process.")
|
61
|
+
|
62
|
+
for i, data in enumerate(box_and_mask_data):
|
63
|
+
box_0_1000 = data['box'] # [ymin, xmin, ymax, xmax] in 0-1000 range
|
64
|
+
mask_b64 = data['mask_base64']
|
65
|
+
color_index = i % len(colors_rgba)
|
66
|
+
outline_color = colors_rgba[color_index]
|
67
|
+
# 叠加掩码时使用半透明颜色
|
68
|
+
mask_fill_color = outline_color[:3] + (int(255 * 0.7),) # 70% Alpha
|
69
|
+
|
70
|
+
# --- 1. 坐标转换与验证 ---
|
71
|
+
# 将 0-1000 坐标转换为图像像素坐标 (left, top, right, bottom)
|
72
|
+
# 假设 box 是 [ymin, xmin, ymax, xmax]
|
73
|
+
try:
|
74
|
+
ymin_norm, xmin_norm, ymax_norm, xmax_norm = [c / 1000.0 for c in box_0_1000]
|
75
|
+
|
76
|
+
left = int(xmin_norm * img_width)
|
77
|
+
top = int(ymin_norm * img_height)
|
78
|
+
right = int(xmax_norm * img_width)
|
79
|
+
bottom = int(ymax_norm * img_height)
|
80
|
+
|
81
|
+
# 确保坐标在图像范围内且有效
|
82
|
+
left = max(0, left)
|
83
|
+
top = max(0, top)
|
84
|
+
right = min(img_width, right)
|
85
|
+
bottom = min(img_height, bottom)
|
86
|
+
|
87
|
+
box_width_px = right - left
|
88
|
+
box_height_px = bottom - top
|
89
|
+
|
90
|
+
if box_width_px <= 0 or box_height_px <= 0:
|
91
|
+
print(f"Skipping box {i+1} due to zero or negative dimensions after conversion.")
|
92
|
+
continue
|
93
|
+
|
94
|
+
except Exception as e:
|
95
|
+
print(f"Error processing coordinates for box {i+1}: {box_0_1000}. Error: {e}")
|
96
|
+
continue
|
97
|
+
|
98
|
+
print(f"Processing Box {i+1}: Pixels(L,T,R,B)=({left},{top},{right},{bottom}) Color={colors_hex[color_index]}")
|
99
|
+
|
100
|
+
# --- 2. 在叠加图像上绘制边界框 ---
|
101
|
+
try:
|
102
|
+
draw.rectangle([left, top, right, bottom], outline=outline_color, width=5)
|
103
|
+
except Exception as e:
|
104
|
+
print(f"Error drawing rectangle for box {i+1}: {e}")
|
105
|
+
continue
|
106
|
+
|
107
|
+
# --- 3. 处理并绘制掩码 ---
|
108
|
+
try:
|
109
|
+
# 解码 Base64 掩码数据
|
110
|
+
mask_bytes = base64.b64decode(mask_b64)
|
111
|
+
mask_img_raw = Image.open(io.BytesIO(mask_bytes)).convert("RGBA")
|
112
|
+
|
113
|
+
# 将掩码图像缩放到边界框的像素尺寸
|
114
|
+
mask_img_resized = mask_img_raw.resize((box_width_px, box_height_px), Image.Resampling.NEAREST)
|
115
|
+
|
116
|
+
# 创建一个纯色块,应用掩码的 alpha 通道
|
117
|
+
color_block = Image.new('RGBA', mask_img_resized.size, mask_fill_color)
|
118
|
+
|
119
|
+
# 将带有透明度的颜色块粘贴到叠加图像上,使用掩码的 alpha 通道作为粘贴蒙版
|
120
|
+
# mask_img_resized.split()[-1] 提取 alpha 通道
|
121
|
+
img_overlay.paste(color_block, (left, top), mask=mask_img_resized.split()[-1])
|
122
|
+
|
123
|
+
except base64.binascii.Error:
|
124
|
+
print(f"Error: Invalid Base64 data for mask {i+1}.")
|
125
|
+
continue
|
126
|
+
except Exception as e:
|
127
|
+
print(f"Error processing or drawing mask for box {i+1}: {e}")
|
128
|
+
continue
|
129
|
+
|
130
|
+
# --- 4. 生成对比图 ---
|
131
|
+
try:
|
132
|
+
# 从原始图像中裁剪出边界框区域
|
133
|
+
img_crop = img_original.crop((left, top, right, bottom))
|
134
|
+
|
135
|
+
# 准备掩码预览图(使用原始解码后的掩码,调整大小以匹配裁剪区域)
|
136
|
+
# 这里直接使用缩放后的 mask_img_resized 的 RGB 部分可能更直观
|
137
|
+
mask_preview = mask_img_resized.convert("RGB") # 转换为 RGB 以便保存为常见格式
|
138
|
+
|
139
|
+
# 保存裁剪图和掩码预览图
|
140
|
+
crop_filename = os.path.join(output_compare_dir, f"compare_{i+1}_crop.png")
|
141
|
+
mask_filename = os.path.join(output_compare_dir, f"compare_{i+1}_mask.png")
|
142
|
+
img_crop.save(crop_filename)
|
143
|
+
mask_preview.save(mask_filename)
|
144
|
+
print(f" - Saved comparison: {crop_filename}, {mask_filename}")
|
145
|
+
|
146
|
+
except Exception as e:
|
147
|
+
print(f"Error creating or saving comparison images for box {i+1}: {e}")
|
148
|
+
|
149
|
+
# --- 5. 保存最终的叠加图像 ---
|
150
|
+
try:
|
151
|
+
img_overlay.save(output_overlay_path)
|
152
|
+
print(f"\nOverlay image saved to: {output_overlay_path}")
|
153
|
+
print(f"Comparison images saved in: {output_compare_dir}")
|
154
|
+
except Exception as e:
|
155
|
+
print(f"Error saving the final overlay image: {e}")
|
156
|
+
|
157
|
+
def get_json_from_text(text):
|
158
|
+
regex_pattern = r'({\"box_2d\".+?})' # 匹配包含至少一个对象的数组
|
159
|
+
# regex_pattern = r'(\[\s*\{.*?\}\s*\])' # 匹配包含至少一个对象的数组
|
160
|
+
|
161
|
+
# 使用 re.search 查找第一个匹配项,re.MULTILINE 使点号能匹配换行符
|
162
|
+
match = re.search(regex_pattern, text, re.MULTILINE)
|
163
|
+
|
164
|
+
|
165
|
+
if match:
|
166
|
+
# 提取匹配到的整个 JSON 数组字符串 (group 1 因为模式中有括号)
|
167
|
+
json_string = match.group(1)
|
168
|
+
# print(f"匹配到的 JSON 字符串: {json_string}")
|
169
|
+
|
170
|
+
try:
|
171
|
+
# 使用 json.loads() 解析字符串
|
172
|
+
parsed_data = json.loads(json_string)
|
173
|
+
# 使用 json.dumps 美化打印输出
|
174
|
+
# print(json.dumps(parsed_data, indent=2, ensure_ascii=False))
|
175
|
+
|
176
|
+
# 例如,获取第一个元素的 label
|
177
|
+
if isinstance(parsed_data, list) and len(parsed_data) > 0:
|
178
|
+
first_item = parsed_data[0]
|
179
|
+
if isinstance(first_item, dict):
|
180
|
+
label = first_item.get('label')
|
181
|
+
print(f"\n第一个元素的 label 是: {label}")
|
182
|
+
return first_item
|
183
|
+
|
184
|
+
return parsed_data
|
185
|
+
|
186
|
+
except json.JSONDecodeError as e:
|
187
|
+
print(f"JSON 解析错误: {e}")
|
188
|
+
print(f"出错的字符串是: {json_string}")
|
189
|
+
else:
|
190
|
+
print("在文本中未找到匹配的 JSON 数组。")
|
191
|
+
|
192
|
+
|
193
|
+
@register_tool()
|
194
|
+
async def find_and_click_element(target_element, input_text=None):
|
195
|
+
"""
|
196
|
+
在当前屏幕截图中查找目标 UI 元素,并在屏幕上点击其中心点。
|
197
|
+
|
198
|
+
此函数首先截取当前屏幕,然后将截图和目标元素的描述 (`target_element`) 发送给配置好的大语言模型 (LLM)。
|
199
|
+
LLM 被要求识别出目标元素,并返回其在截图中的边界框 (bounding box) 和掩码 (mask) 信息(通常以 JSON 格式)。
|
200
|
+
函数接着解析 LLM 的响应,提取出边界框坐标。
|
201
|
+
(可选)为了调试和验证,函数可以根据 LLM 返回的数据在截图副本上绘制边界框和掩码,并将结果保存为图像文件。
|
202
|
+
最后,函数计算边界框的中心点像素坐标,并使用 `pyautogui` 库在该屏幕坐标上模拟鼠标点击。如果提供了 `input_text`,则会在点击后尝试输入该文本。
|
203
|
+
|
204
|
+
Args:
|
205
|
+
target_element (str): 需要查找和点击的 UI 元素的文本描述 (例如 "登录按钮", "用户名输入框")。LLM 将使用此描述来定位元素。
|
206
|
+
input_text (str, optional): 在点击元素后需要输入的文本。如果为 None 或空字符串,则只执行点击操作。默认为 None。
|
207
|
+
|
208
|
+
Returns:
|
209
|
+
str: 如果成功找到元素、计算坐标并执行点击(以及可能的输入),则返回表示成功的字符串消息 (例如 "点击成功!", "点击并输入 '...' 成功!")。
|
210
|
+
如果在任何步骤中失败(例如截图失败、LLM 未返回有效坐标、点击失败),则返回 False。
|
211
|
+
如果点击成功但输入失败,则返回包含错误信息的字符串。
|
212
|
+
"""
|
213
|
+
|
214
|
+
click_agent_config = {
|
215
|
+
"api_key": os.getenv("API_KEY"),
|
216
|
+
"api_url": os.getenv("BASE_URL"),
|
217
|
+
"engine": "gemini-2.5-pro",
|
218
|
+
"system_prompt": "you are a professional UI test engineer, now you need to find the specified screen element.",
|
219
|
+
# "system_prompt": "你是一个专业的UI测试工程师,现在需要你找到指定屏幕元素。",
|
220
|
+
"print_log": True,
|
221
|
+
"temperature": 0.7,
|
222
|
+
"use_plugins": False,
|
223
|
+
}
|
224
|
+
|
225
|
+
# 工作agent初始化
|
226
|
+
click_agent = chatgpt(**click_agent_config)
|
227
|
+
prompt = f"Give the segmentation masks for the {target_element}. Output a JSON list of segmentation masks where each entry contains the 2D bounding box in \"box_2d\" and the mask in \"mask\". Only output the one that meets the criteria the most."
|
228
|
+
|
229
|
+
print("正在截取当前屏幕...")
|
230
|
+
try:
|
231
|
+
# 使用 pyautogui 截取屏幕,返回 PIL Image 对象
|
232
|
+
screenshot = pyautogui.screenshot()
|
233
|
+
# img_width, img_height = screenshot.size # 获取截图尺寸
|
234
|
+
img_width, img_height = pyautogui.size()
|
235
|
+
print(f"截图成功,尺寸: {img_width}x{img_height}")
|
236
|
+
|
237
|
+
# 将 PIL Image 对象转换为 Base64 编码的 PNG 字符串
|
238
|
+
buffered = io.BytesIO()
|
239
|
+
screenshot.save(buffered, format="PNG")
|
240
|
+
base64_encoded_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
241
|
+
IMAGE_MIME_TYPE = "image/png" # 截图格式为 PNG
|
242
|
+
|
243
|
+
except ImportError:
|
244
|
+
# Pillow 也是 pyautogui 的依赖,但以防万一单独处理
|
245
|
+
print("\n❌ 请安装所需库: pip install Pillow pyautogui")
|
246
|
+
return False
|
247
|
+
except Exception as e:
|
248
|
+
print(f"\n❌ 截取屏幕或处理图像时出错: {e}")
|
249
|
+
return False
|
250
|
+
|
251
|
+
engine_type = "gpt"
|
252
|
+
message_list = []
|
253
|
+
text_message = await get_text_message(prompt, engine_type)
|
254
|
+
image_message = await get_image_message(f"data:{IMAGE_MIME_TYPE};base64," + base64_encoded_image, engine_type)
|
255
|
+
message_list.append(text_message)
|
256
|
+
message_list.append(image_message)
|
257
|
+
|
258
|
+
result = await click_agent.ask_async(message_list)
|
259
|
+
if result.strip() == '':
|
260
|
+
print("\n❌ click智能体回复为空,请重新生成指令。")
|
261
|
+
return False
|
262
|
+
first_item = get_json_from_text(result)
|
263
|
+
if not first_item or "box_2d" not in first_item:
|
264
|
+
print("\n❌ 未能从模型响应中提取有效的 box_2d。")
|
265
|
+
return False
|
266
|
+
|
267
|
+
|
268
|
+
box_0_1000 = first_item.get("box_2d") # 假设格式为 [ymin, xmin, ymax, xmax],范围 0-1000
|
269
|
+
mask_data_uri = first_item.get("mask") # 假设格式为 "data:image/png;base64,..."
|
270
|
+
|
271
|
+
if not box_0_1000 or not isinstance(box_0_1000, list) or len(box_0_1000) != 4:
|
272
|
+
print(f"\n❌ 未能从模型响应中提取有效的 box_2d: {box_0_1000}")
|
273
|
+
return False
|
274
|
+
if not mask_data_uri or not isinstance(mask_data_uri, str) or not mask_data_uri.startswith("data:image/png;base64,"):
|
275
|
+
print(f"\n❌ 未能从模型响应中提取有效的 mask data URI: {mask_data_uri}")
|
276
|
+
# 如果找不到蒙版,可以选择是失败返回还是继续点击(这里选择继续)
|
277
|
+
mask_b64 = None # 没有有效的蒙版
|
278
|
+
else:
|
279
|
+
# 提取 Base64 部分
|
280
|
+
mask_b64 = mask_data_uri.split(',')[-1]
|
281
|
+
|
282
|
+
print(f"✅ click智能体回复 (box_2d 范围 0-1000): {box_0_1000}")
|
283
|
+
# ----------------------------------------------
|
284
|
+
|
285
|
+
# --- 新增:调用 display 函数进行可视化 ---
|
286
|
+
if box_0_1000: # 仅在有蒙版数据时才尝试绘制
|
287
|
+
try:
|
288
|
+
print("尝试生成可视化叠加图像...")
|
289
|
+
box_and_mask_data_for_display = [{
|
290
|
+
"box": box_0_1000,
|
291
|
+
"mask_base64": mask_b64
|
292
|
+
}]
|
293
|
+
display_image_with_bounding_boxes_and_masks_py(
|
294
|
+
original_image=screenshot, # 传递 PIL Image 对象
|
295
|
+
box_and_mask_data=box_and_mask_data_for_display,
|
296
|
+
output_overlay_path=f"click_overlay_{time.strftime('%Y%m%d_%H%M%S')}.png", # 可以自定义输出文件名
|
297
|
+
output_compare_dir="click_compare" # 可以自定义输出目录
|
298
|
+
)
|
299
|
+
except Exception as e:
|
300
|
+
print(f"⚠️ 生成可视化图像时出错: {e}") # 出错不影响点击逻辑继续
|
301
|
+
else:
|
302
|
+
print("⚠️ 未找到有效的坐标数据,跳过可视化。")
|
303
|
+
|
304
|
+
try:
|
305
|
+
|
306
|
+
# 检查 box_0_1000 格式是否正确
|
307
|
+
if not (isinstance(box_0_1000, list) and len(box_0_1000) == 4 and all(isinstance(c, int) for c in box_0_1000)):
|
308
|
+
print(f"\n❌ 无效的 box_2d 格式或类型: {box_0_1000},期望是包含4个整数的列表。")
|
309
|
+
return False
|
310
|
+
|
311
|
+
# 坐标转换 (0-1000 范围到 0.0-1.0 范围)
|
312
|
+
ymin_norm, xmin_norm, ymax_norm, xmax_norm = [c / 1000.0 for c in box_0_1000]
|
313
|
+
|
314
|
+
# 计算相对于截图的像素坐标
|
315
|
+
left = int(xmin_norm * img_width)
|
316
|
+
top = int(ymin_norm * img_height)
|
317
|
+
right = int(xmax_norm * img_width)
|
318
|
+
bottom = int(ymax_norm * img_height)
|
319
|
+
|
320
|
+
# 确保坐标在截图范围内且有效
|
321
|
+
left = max(0, left)
|
322
|
+
top = max(0, top)
|
323
|
+
right = min(img_width, right)
|
324
|
+
bottom = min(img_height, bottom)
|
325
|
+
|
326
|
+
# 检查边界框是否有效
|
327
|
+
if left >= right or top >= bottom:
|
328
|
+
print(f"\n❌ 计算出的边界框无效: left={left}, top={top}, right={right}, bottom={bottom}")
|
329
|
+
return False
|
330
|
+
|
331
|
+
# 计算点击的中心点 (相对于截图的坐标)
|
332
|
+
# **注意**: 这个坐标现在是相对于截图左上角的像素坐标。
|
333
|
+
# 如果截图是全屏的,那么这个坐标也就是屏幕坐标。
|
334
|
+
center_x = (left + right) // 2
|
335
|
+
center_y = (top + bottom) // 2
|
336
|
+
|
337
|
+
print(f"截图尺寸: width={img_width}, height={img_height}")
|
338
|
+
print(f"计算出的像素坐标 (相对于截图): left={left}, top={top}, right={right}, bottom={bottom}")
|
339
|
+
print(f"计算出的点击中心点 (屏幕坐标): x={center_x}, y={center_y}")
|
340
|
+
|
341
|
+
# 执行点击操作
|
342
|
+
print(f"尝试在屏幕坐标 ({center_x}, {center_y}) 点击...")
|
343
|
+
# 使用 pyautogui 在电脑屏幕上点击
|
344
|
+
pyautogui.click(center_x, center_y)
|
345
|
+
pyautogui.click(center_x, center_y)
|
346
|
+
print(f"✅ 在 ({center_x}, {center_y}) 点击成功。")
|
347
|
+
# input_text = "123456"
|
348
|
+
if input_text:
|
349
|
+
try:
|
350
|
+
print(f"尝试通过剪贴板输入文本: '{input_text}'")
|
351
|
+
# 保存当前剪贴板内容
|
352
|
+
original_clipboard_content = pyperclip.paste()
|
353
|
+
pyperclip.copy(input_text) # 将文本复制到剪贴板
|
354
|
+
|
355
|
+
# 根据操作系统执行粘贴操作
|
356
|
+
if platform.system() == "Darwin": # macOS
|
357
|
+
pyautogui.hotkey('command', 'v')
|
358
|
+
else: # Windows, Linux, etc.
|
359
|
+
pyautogui.hotkey('ctrl', 'v')
|
360
|
+
|
361
|
+
time.sleep(0.1) # 给粘贴操作一点时间,确保文本已粘贴
|
362
|
+
|
363
|
+
# 恢复原始剪贴板内容
|
364
|
+
# 如果不希望恢复,可以注释掉下面这行
|
365
|
+
pyperclip.copy(original_clipboard_content)
|
366
|
+
|
367
|
+
print(f"✅ 通过剪贴板输入文本成功。")
|
368
|
+
return f"点击并输入 '{input_text}' 成功!"
|
369
|
+
except ImportError:
|
370
|
+
print("\n❌ pyperclip 库未安装。请运行 'pip install pyperclip' 以支持通过剪贴板输入中文。")
|
371
|
+
print(f"将尝试使用 pyautogui.typewrite (可能无法正确输入中文): '{input_text}'")
|
372
|
+
try:
|
373
|
+
pyautogui.typewrite(input_text, interval=0.1) # 尝试原始方法作为备选
|
374
|
+
print(f"✅ (备用 typewrite) 已尝试输入文本。")
|
375
|
+
return f"点击并尝试输入 '{input_text}' (使用 typewrite,中文可能失败)!"
|
376
|
+
except Exception as e_typewrite:
|
377
|
+
print(f"\n❌ 使用 pyautogui.typewrite 输入文本时也发生错误: {e_typewrite}")
|
378
|
+
return f"点击成功,但输入文本 '{input_text}' (typewrite) 失败: {e_typewrite}"
|
379
|
+
except Exception as e:
|
380
|
+
print(f"\n❌ 通过剪贴板输入文本时发生错误: {e}")
|
381
|
+
# 即使输入失败,点击也算成功了
|
382
|
+
return f"点击成功,但输入文本 '{input_text}' (剪贴板) 失败: {e}"
|
383
|
+
else:
|
384
|
+
# 如果没有提供 input_text,只返回点击成功
|
385
|
+
return "点击成功!"
|
386
|
+
|
387
|
+
# except FileNotFoundError:
|
388
|
+
# print(f"错误:找不到图片文件 '{image_path}' 用于获取尺寸。")
|
389
|
+
# return False
|
390
|
+
except ImportError:
|
391
|
+
print("\n❌ 请安装所需库: pip install Pillow pyautogui")
|
392
|
+
return False
|
393
|
+
# 移除 AdbError 捕获
|
394
|
+
except Exception as e:
|
395
|
+
# 添加 pyautogui 可能抛出的异常类型,如果需要更精细的处理
|
396
|
+
print(f"\n❌ 处理点击时发生意外错误: {e}")
|
397
|
+
return False
|
398
|
+
|
399
|
+
|
400
|
+
@register_tool()
|
401
|
+
async def scroll_screen(direction: str = "down"):
|
402
|
+
"""
|
403
|
+
控制屏幕向上或向下滑动固定的距离。
|
404
|
+
|
405
|
+
Args:
|
406
|
+
direction (str, optional): 滚动的方向。可以是 "up" 或 "down"。
|
407
|
+
默认为 "down"。
|
408
|
+
|
409
|
+
Returns:
|
410
|
+
str: 如果成功执行滚动,则返回相应的成功消息。
|
411
|
+
如果方向无效或发生错误,则返回错误信息。
|
412
|
+
"""
|
413
|
+
scroll_offset = 20
|
414
|
+
actual_scroll_amount = 0
|
415
|
+
|
416
|
+
if direction == "down":
|
417
|
+
actual_scroll_amount = -scroll_offset # 向下滚动为负值
|
418
|
+
print(f"尝试向下滚动屏幕,固定偏移量: {scroll_offset}...")
|
419
|
+
elif direction == "up":
|
420
|
+
actual_scroll_amount = scroll_offset # 向上滚动为正值
|
421
|
+
print(f"尝试向上滚动屏幕,固定偏移量: {scroll_offset}...")
|
422
|
+
else:
|
423
|
+
error_msg = f"错误:无效的滚动方向 '{direction}'。请使用 'up' 或 'down'。"
|
424
|
+
print(f"\n❌ {error_msg}")
|
425
|
+
return error_msg
|
426
|
+
|
427
|
+
try:
|
428
|
+
pyautogui.scroll(actual_scroll_amount)
|
429
|
+
success_msg = f"✅ 屏幕向 {direction} 滑动 {scroll_offset} 成功。"
|
430
|
+
print(success_msg)
|
431
|
+
return success_msg
|
432
|
+
except ImportError:
|
433
|
+
print("\n❌ pyautogui 库未安装。请运行 'pip install pyautogui'。")
|
434
|
+
return "错误:pyautogui 库未安装。"
|
435
|
+
except Exception as e:
|
436
|
+
error_msg = f"错误:屏幕滚动时发生: {e}"
|
437
|
+
print(f"\n❌ {error_msg}")
|
438
|
+
return error_msg
|
439
|
+
|
440
|
+
|
441
|
+
if __name__ == "__main__":
|
442
|
+
import asyncio
|
443
|
+
IMAGE_PATH = os.environ.get("IMAGE_PATH")
|
444
|
+
import time
|
445
|
+
time.sleep(2)
|
446
|
+
# asyncio.run(find_and_click_element("Write a message...", "你好"))
|
447
|
+
# asyncio.run(find_and_click_element("搜索框"))
|
448
|
+
# print(get_json_from_text(text))
|
449
|
+
|
450
|
+
# 测试滚动功能
|
451
|
+
asyncio.run(scroll_screen("down")) # 向下滚动
|
452
|
+
time.sleep(2) # 等待2秒观察效果
|
453
|
+
asyncio.run(scroll_screen("up")) # 向上滚动
|
454
|
+
# asyncio.run(scroll_screen("sideways")) # 测试无效方向
|
455
|
+
|
456
|
+
# python -m beswarm.tools.click
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: beswarm
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.36
|
4
4
|
Summary: MAS
|
5
5
|
Requires-Python: >=3.11
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -19,8 +19,11 @@ Requires-Dist: networkx>=3.4.2
|
|
19
19
|
Requires-Dist: numpy>=2.2.4
|
20
20
|
Requires-Dist: pdfminer-six==20240706
|
21
21
|
Requires-Dist: pillow>=11.2.1
|
22
|
+
Requires-Dist: pip>=25.1.1
|
23
|
+
Requires-Dist: pyautogui>=0.9.54
|
22
24
|
Requires-Dist: pyexecjs>=1.5.1
|
23
25
|
Requires-Dist: pygments>=2.19.1
|
26
|
+
Requires-Dist: pyperclip>=1.9.0
|
24
27
|
Requires-Dist: pytz>=2025.2
|
25
28
|
Requires-Dist: requests>=2.32.3
|
26
29
|
Requires-Dist: scipy>=1.15.2
|
@@ -67,6 +70,22 @@ cd ~/Downloads/GitHub/beswarm && docker run --rm \
|
|
67
70
|
--goal "分析这个仓库 https://github.com/cloneofsimo/minRF"
|
68
71
|
```
|
69
72
|
|
73
|
+
测试 docker 是否可以用 GPU:
|
74
|
+
|
75
|
+
```
|
76
|
+
docker run --gpus all -it --rm --entrypoint nvidia-smi yym68686/beswarm
|
77
|
+
```
|
78
|
+
|
79
|
+
服务器安装
|
80
|
+
|
81
|
+
```
|
82
|
+
pip install pipx
|
83
|
+
pipx ensurepath
|
84
|
+
source ~/.bashrc
|
85
|
+
pipx install nvitop
|
86
|
+
pip install beswarm -i https://pypi.tuna.tsinghua.edu.cn/simple
|
87
|
+
```
|
88
|
+
|
70
89
|
main.py
|
71
90
|
|
72
91
|
```python
|
@@ -1,15 +1,16 @@
|
|
1
1
|
beswarm/__init__.py,sha256=HZjUOJtZR5QhMuDbq-wukQQn1VrBusNWai_ysGo-VVI,20
|
2
2
|
beswarm/utils.py,sha256=AdDCcqAIIKQEMl7PfryVgeT9G5sHe7QNsZnrvmTGA8E,283
|
3
3
|
beswarm/aient/main.py,sha256=SiYAIgQlLJqYusnTVEJOx1WNkSJKMImhgn5aWjfroxg,3814
|
4
|
-
beswarm/aient/setup.py,sha256=
|
4
|
+
beswarm/aient/setup.py,sha256=UvQkQk0MwjVxgzgR4-QJxBy2A4et3GTfTnlbiHw6LVQ,487
|
5
5
|
beswarm/aient/src/aient/__init__.py,sha256=SRfF7oDVlOOAi6nGKiJIUK6B_arqYLO9iSMp-2IZZps,21
|
6
6
|
beswarm/aient/src/aient/core/__init__.py,sha256=NxjebTlku35S4Dzr16rdSqSTWUvvwEeACe8KvHJnjPg,34
|
7
7
|
beswarm/aient/src/aient/core/log_config.py,sha256=kz2_yJv1p-o3lUQOwA3qh-LSc3wMHv13iCQclw44W9c,274
|
8
8
|
beswarm/aient/src/aient/core/models.py,sha256=_1wYZg_n9kb2A3C8xCboyqleH2iHc9scwOvtx9DPeok,7582
|
9
|
-
beswarm/aient/src/aient/core/request.py,sha256=
|
10
|
-
beswarm/aient/src/aient/core/response.py,sha256=
|
11
|
-
beswarm/aient/src/aient/core/utils.py,sha256=
|
9
|
+
beswarm/aient/src/aient/core/request.py,sha256=RChzDuH49gaJE-o5g65h3nCh-OsuHPwLkq8yuyYEcbo,61431
|
10
|
+
beswarm/aient/src/aient/core/response.py,sha256=8bS1nAoP6QOMDeDvJvZDVAt34kZ1DpWBI3PUGyza0ZU,31447
|
11
|
+
beswarm/aient/src/aient/core/utils.py,sha256=CAFqWzICaKVysH9GLHBcp-VeOShisLjWGhEsh6-beWo,26365
|
12
12
|
beswarm/aient/src/aient/core/test/test_base_api.py,sha256=pWnycRJbuPSXKKU9AQjWrMAX1wiLC_014Qc9hh5C2Pw,524
|
13
|
+
beswarm/aient/src/aient/core/test/test_geminimask.py,sha256=HFX8jDbNg_FjjgPNxfYaR-0-roUrOO-ND-FVsuxSoiw,13254
|
13
14
|
beswarm/aient/src/aient/core/test/test_image.py,sha256=_T4peNGdXKBHHxyQNx12u-NTyFE8TlYI6NvvagsG2LE,319
|
14
15
|
beswarm/aient/src/aient/core/test/test_payload.py,sha256=8jBiJY1uidm1jzL-EiK0s6UGmW9XkdsuuKFGrwFhFkw,2755
|
15
16
|
beswarm/aient/src/aient/models/__init__.py,sha256=ouNDNvoBBpIFrLsk09Q_sq23HR0GbLAKfGLIFmfEuXE,219
|
@@ -24,7 +25,7 @@ beswarm/aient/src/aient/models/vertex.py,sha256=qVD5l1Q538xXUPulxG4nmDjXE1VoV4yu
|
|
24
25
|
beswarm/aient/src/aient/plugins/__init__.py,sha256=p3KO6Aa3Lupos4i2SjzLQw1hzQTigOAfEHngsldrsyk,986
|
25
26
|
beswarm/aient/src/aient/plugins/arXiv.py,sha256=yHjb6PS3GUWazpOYRMKMzghKJlxnZ5TX8z9F6UtUVow,1461
|
26
27
|
beswarm/aient/src/aient/plugins/config.py,sha256=KnZ5xtb5o41FI2_qvxTEQhssdd3WJc7lIAFNR85INQw,7817
|
27
|
-
beswarm/aient/src/aient/plugins/excute_command.py,sha256=
|
28
|
+
beswarm/aient/src/aient/plugins/excute_command.py,sha256=u-JOZ21dDcDx1j3O0KVIHAsa6MNuOxHFBdV3iCnTih0,5413
|
28
29
|
beswarm/aient/src/aient/plugins/get_time.py,sha256=Ih5XIW5SDAIhrZ9W4Qe5Hs1k4ieKPUc_LAd6ySNyqZk,654
|
29
30
|
beswarm/aient/src/aient/plugins/image.py,sha256=ZElCIaZznE06TN9xW3DrSukS7U3A5_cjk1Jge4NzPxw,2072
|
30
31
|
beswarm/aient/src/aient/plugins/list_directory.py,sha256=5ubm-mfrj-tanGSDp4M_Tmb6vQb3dx2-XVfQ2yL2G8A,1394
|
@@ -118,14 +119,16 @@ beswarm/queries/tree-sitter-languages/ruby-tags.scm,sha256=vIidsCeE2A0vdFN18yXKq
|
|
118
119
|
beswarm/queries/tree-sitter-languages/rust-tags.scm,sha256=9ljM1nzhfPs_ZTRw7cr2P9ToOyhGcKkCoN4_HPXSWi4,1451
|
119
120
|
beswarm/queries/tree-sitter-languages/scala-tags.scm,sha256=UxQjz80JIrrJ7Pm56uUnQyThfmQNvwk7aQzPNypB-Ao,1761
|
120
121
|
beswarm/queries/tree-sitter-languages/typescript-tags.scm,sha256=OMdCeedPiA24ky82DpgTMKXK_l2ySTuF2zrQ2fJAi9E,1253
|
121
|
-
beswarm/tools/
|
122
|
+
beswarm/tools/UIworker.py,sha256=9e0iWBIi-ET6OvhRftuMSphixihySlTjvB4pCZvKuAU,6331
|
123
|
+
beswarm/tools/__init__.py,sha256=-h_zoMEjnLCg9iVgSoor9BI3yK64LdVOQkVB0DgGFmo,1001
|
124
|
+
beswarm/tools/click.py,sha256=vjCN6_00xJw-dcMZEv9sneTBmuKI7lFN4xApZZizd5k,20760
|
122
125
|
beswarm/tools/edit_file.py,sha256=qKOVanfh_79eavRrqqyFirRo07sZ897jUL2PD4GRKJk,6965
|
123
126
|
beswarm/tools/planner.py,sha256=lguBCS6kpwNPoXQvqH-WySabVubT82iyWOkJnjt6dXw,1265
|
124
127
|
beswarm/tools/repomap.py,sha256=CwvwoN5Swr42EzrORTTeV8MMb7mPviy4a4b0fxBu50k,40828
|
125
128
|
beswarm/tools/search_arxiv.py,sha256=9slwBemXjEqrd7-YgVmyMijPXlkhZCybEDRVhWVQ9B0,7937
|
126
129
|
beswarm/tools/think.py,sha256=WLw-7jNIsnS6n8MMSYUin_f-BGLENFmnKM2LISEp0co,1760
|
127
130
|
beswarm/tools/worker.py,sha256=FfKCx7KFNbMRoAXtjU1_nJQjx9WHny7KBq8OXSYICJs,5334
|
128
|
-
beswarm-0.1.
|
129
|
-
beswarm-0.1.
|
130
|
-
beswarm-0.1.
|
131
|
-
beswarm-0.1.
|
131
|
+
beswarm-0.1.36.dist-info/METADATA,sha256=SrywCezGn-Q5A64VxtiYcJKwWtzPQip0Rdz38b6Qrxk,3197
|
132
|
+
beswarm-0.1.36.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
133
|
+
beswarm-0.1.36.dist-info/top_level.txt,sha256=pJw4O87wvt5882smuSO6DfByJz7FJ8SxxT8h9fHCmpo,8
|
134
|
+
beswarm-0.1.36.dist-info/RECORD,,
|
File without changes
|