mobile-mcp-ai 2.2.6__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. mobile_mcp/config.py +3 -2
  2. mobile_mcp/core/basic_tools_lite.py +3193 -0
  3. mobile_mcp/core/ios_client_wda.py +569 -0
  4. mobile_mcp/core/ios_device_manager_wda.py +306 -0
  5. mobile_mcp/core/mobile_client.py +246 -20
  6. mobile_mcp/core/template_matcher.py +429 -0
  7. mobile_mcp/core/templates/close_buttons/auto_x_0112_151217.png +0 -0
  8. mobile_mcp/core/templates/close_buttons/auto_x_0112_152037.png +0 -0
  9. mobile_mcp/core/templates/close_buttons/auto_x_0112_152840.png +0 -0
  10. mobile_mcp/core/templates/close_buttons/auto_x_0112_153256.png +0 -0
  11. mobile_mcp/core/templates/close_buttons/auto_x_0112_154847.png +0 -0
  12. mobile_mcp/core/templates/close_buttons/gray_x_stock_ad.png +0 -0
  13. mobile_mcp/mcp_tools/__init__.py +10 -0
  14. mobile_mcp/mcp_tools/mcp_server.py +992 -0
  15. mobile_mcp_ai-2.5.3.dist-info/METADATA +456 -0
  16. mobile_mcp_ai-2.5.3.dist-info/RECORD +32 -0
  17. mobile_mcp_ai-2.5.3.dist-info/entry_points.txt +2 -0
  18. mobile_mcp/core/ai/__init__.py +0 -11
  19. mobile_mcp/core/ai/ai_analyzer.py +0 -197
  20. mobile_mcp/core/ai/ai_config.py +0 -116
  21. mobile_mcp/core/ai/ai_platform_adapter.py +0 -399
  22. mobile_mcp/core/ai/smart_test_executor.py +0 -520
  23. mobile_mcp/core/ai/test_generator.py +0 -365
  24. mobile_mcp/core/ai/test_generator_from_history.py +0 -391
  25. mobile_mcp/core/ai/test_generator_standalone.py +0 -293
  26. mobile_mcp/core/assertion/__init__.py +0 -9
  27. mobile_mcp/core/assertion/smart_assertion.py +0 -341
  28. mobile_mcp/core/basic_tools.py +0 -945
  29. mobile_mcp/core/h5/__init__.py +0 -10
  30. mobile_mcp/core/h5/h5_handler.py +0 -548
  31. mobile_mcp/core/ios_client.py +0 -219
  32. mobile_mcp/core/ios_device_manager.py +0 -252
  33. mobile_mcp/core/locator/__init__.py +0 -10
  34. mobile_mcp/core/locator/cursor_ai_auto_analyzer.py +0 -119
  35. mobile_mcp/core/locator/cursor_vision_helper.py +0 -414
  36. mobile_mcp/core/locator/mobile_smart_locator.py +0 -1747
  37. mobile_mcp/core/locator/position_analyzer.py +0 -813
  38. mobile_mcp/core/locator/script_updater.py +0 -157
  39. mobile_mcp/core/nl_test_runner.py +0 -585
  40. mobile_mcp/core/smart_app_launcher.py +0 -421
  41. mobile_mcp/core/smart_tools.py +0 -311
  42. mobile_mcp/mcp/__init__.py +0 -13
  43. mobile_mcp/mcp/mcp_server.py +0 -1126
  44. mobile_mcp/mcp/mcp_server_simple.py +0 -23
  45. mobile_mcp/vision/__init__.py +0 -10
  46. mobile_mcp/vision/vision_locator.py +0 -405
  47. mobile_mcp_ai-2.2.6.dist-info/METADATA +0 -503
  48. mobile_mcp_ai-2.2.6.dist-info/RECORD +0 -49
  49. mobile_mcp_ai-2.2.6.dist-info/entry_points.txt +0 -2
  50. {mobile_mcp_ai-2.2.6.dist-info → mobile_mcp_ai-2.5.3.dist-info}/WHEEL +0 -0
  51. {mobile_mcp_ai-2.2.6.dist-info → mobile_mcp_ai-2.5.3.dist-info}/licenses/LICENSE +0 -0
  52. {mobile_mcp_ai-2.2.6.dist-info → mobile_mcp_ai-2.5.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,3193 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ 精简版基础工具 - 纯 MCP,依赖 Cursor 视觉能力
5
+
6
+ 特点:
7
+ - 不需要 AI 密钥
8
+ - 核心功能精简
9
+ - 保留 pytest 脚本生成
10
+ - 支持操作历史记录
11
+ """
12
+
13
+ import asyncio
14
+ import time
15
+ import re
16
+ from pathlib import Path
17
+ from typing import Dict, List, Optional
18
+ from datetime import datetime
19
+
20
+
21
+ class BasicMobileToolsLite:
22
+ """精简版移动端工具"""
23
+
24
+ def __init__(self, mobile_client):
25
+ self.client = mobile_client
26
+
27
+ # 截图目录
28
+ project_root = Path(__file__).parent.parent
29
+ self.screenshot_dir = project_root / "screenshots"
30
+ self.screenshot_dir.mkdir(parents=True, exist_ok=True)
31
+
32
+ # 操作历史(用于生成 pytest 脚本)
33
+ self.operation_history: List[Dict] = []
34
+
35
+ def _is_ios(self) -> bool:
36
+ """判断当前是否为 iOS 平台"""
37
+ return getattr(self.client, 'platform', 'android') == 'ios'
38
+
39
+ def _get_ios_client(self):
40
+ """获取 iOS 客户端"""
41
+ if hasattr(self.client, '_ios_client') and self.client._ios_client:
42
+ return self.client._ios_client
43
+ if hasattr(self.client, 'wda') and self.client.wda:
44
+ return self.client.wda
45
+ return None
46
+
47
+ def _record_operation(self, action: str, **kwargs):
48
+ """记录操作到历史"""
49
+ record = {
50
+ 'action': action,
51
+ 'timestamp': datetime.now().isoformat(),
52
+ **kwargs
53
+ }
54
+ self.operation_history.append(record)
55
+
56
+
57
+
58
+ # ==================== 截图 ====================
59
+
60
+ def take_screenshot(self, description: str = "", compress: bool = True,
61
+ max_width: int = 720, quality: int = 75,
62
+ crop_x: int = 0, crop_y: int = 0, crop_size: int = 0) -> Dict:
63
+ """截图(支持压缩和局部裁剪)
64
+
65
+ 压缩原理:
66
+ 1. 先截取原始 PNG 图片
67
+ 2. 缩小尺寸(如 1080p → 720p)
68
+ 3. 转换为 JPEG 格式 + 降低质量(如 100% → 75%)
69
+ 4. 最终文件从 2MB 压缩到约 80KB(节省 96%)
70
+
71
+ 局部裁剪(用于精确识别小元素):
72
+ - 第一次全屏截图,AI 返回大概坐标
73
+ - 第二次传入 crop_x, crop_y, crop_size 截取局部区域
74
+ - 局部区域不压缩,保持清晰度,AI 可精确识别
75
+ - 返回 crop_offset_x/y 用于坐标换算
76
+
77
+ Args:
78
+ description: 截图描述(可选)
79
+ compress: 是否压缩(默认 True,推荐开启省 token)
80
+ max_width: 压缩后最大宽度(默认 720,对 AI 识别足够)
81
+ quality: JPEG 质量 1-100(默认 75,肉眼几乎看不出区别)
82
+ crop_x: 裁剪中心点 X 坐标(屏幕坐标,0 表示不裁剪)
83
+ crop_y: 裁剪中心点 Y 坐标(屏幕坐标,0 表示不裁剪)
84
+ crop_size: 裁剪区域大小(默认 0 不裁剪,推荐 200-400)
85
+
86
+ 压缩效果示例:
87
+ 原图 PNG: 2048KB
88
+ 压缩后 JPEG (720p, 75%): ~80KB
89
+ 节省: 96%
90
+ """
91
+ try:
92
+ from PIL import Image
93
+
94
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
95
+ platform = "ios" if self._is_ios() else "android"
96
+
97
+ # 第1步:截图保存为临时 PNG
98
+ temp_filename = f"temp_{timestamp}.png"
99
+ temp_path = self.screenshot_dir / temp_filename
100
+
101
+ # 获取屏幕尺寸并截图
102
+ screen_width, screen_height = 0, 0
103
+ if self._is_ios():
104
+ ios_client = self._get_ios_client()
105
+ if ios_client and hasattr(ios_client, 'wda'):
106
+ ios_client.wda.screenshot(str(temp_path))
107
+ size = ios_client.wda.window_size()
108
+ screen_width, screen_height = size[0], size[1]
109
+ else:
110
+ return {"success": False, "message": "❌ iOS 客户端未初始化"}
111
+ else:
112
+ self.client.u2.screenshot(str(temp_path))
113
+ info = self.client.u2.info
114
+ screen_width = info.get('displayWidth', 0)
115
+ screen_height = info.get('displayHeight', 0)
116
+
117
+ original_size = temp_path.stat().st_size
118
+
119
+ # 第2步:打开图片
120
+ img = Image.open(temp_path)
121
+
122
+ # 第2.5步:局部裁剪(如果指定了裁剪参数)
123
+ crop_offset_x, crop_offset_y = 0, 0
124
+ is_cropped = False
125
+
126
+ if crop_x > 0 and crop_y > 0 and crop_size > 0:
127
+ # 计算裁剪区域(以 crop_x, crop_y 为中心)
128
+ half_size = crop_size // 2
129
+ left = max(0, crop_x - half_size)
130
+ top = max(0, crop_y - half_size)
131
+ right = min(img.width, crop_x + half_size)
132
+ bottom = min(img.height, crop_y + half_size)
133
+
134
+ # 记录偏移量(用于坐标换算)
135
+ crop_offset_x = left
136
+ crop_offset_y = top
137
+
138
+ # 裁剪
139
+ img = img.crop((left, top, right, bottom))
140
+ is_cropped = True
141
+
142
+ # ========== 情况1:局部裁剪截图(不压缩,保持清晰度)==========
143
+ if is_cropped:
144
+ # 生成文件名
145
+ if description:
146
+ safe_desc = re.sub(r'[^\w\s-]', '', description).strip().replace(' ', '_')
147
+ filename = f"screenshot_{platform}_crop_{safe_desc}_{timestamp}.png"
148
+ else:
149
+ filename = f"screenshot_{platform}_crop_{timestamp}.png"
150
+
151
+ final_path = self.screenshot_dir / filename
152
+
153
+ # 保存为 PNG(保持清晰度)
154
+ img.save(str(final_path), "PNG")
155
+
156
+ # 删除临时文件
157
+ temp_path.unlink()
158
+
159
+ cropped_size = final_path.stat().st_size
160
+
161
+ return {
162
+ "success": True,
163
+ "screenshot_path": str(final_path),
164
+ "screen_width": screen_width,
165
+ "screen_height": screen_height,
166
+ "image_width": img.width,
167
+ "image_height": img.height,
168
+ "crop_offset_x": crop_offset_x,
169
+ "crop_offset_y": crop_offset_y,
170
+ "file_size": f"{cropped_size/1024:.1f}KB",
171
+ "message": f"🔍 局部截图已保存: {final_path}\n"
172
+ f"📐 裁剪区域: ({crop_offset_x}, {crop_offset_y}) 起,{img.width}x{img.height} 像素\n"
173
+ f"📦 文件大小: {cropped_size/1024:.0f}KB\n"
174
+ f"🎯 【坐标换算】AI 返回坐标 (x, y) 后:\n"
175
+ f" 实际屏幕坐标 = ({crop_offset_x} + x, {crop_offset_y} + y)\n"
176
+ f" 或直接调用 mobile_click_at_coords(x, y, crop_offset_x={crop_offset_x}, crop_offset_y={crop_offset_y})"
177
+ }
178
+
179
+ # ========== 情况2:全屏压缩截图 ==========
180
+ elif compress:
181
+ # 🔴 关键:记录原始图片尺寸(用于坐标转换)
182
+ # 注意:截图尺寸可能和 u2.info 的 displayWidth 不一致!
183
+ original_img_width = img.width
184
+ original_img_height = img.height
185
+
186
+ # 第3步:缩小尺寸(保持宽高比)
187
+ image_width, image_height = img.width, img.height
188
+
189
+ if img.width > max_width:
190
+ ratio = max_width / img.width
191
+ new_w = max_width
192
+ new_h = int(img.height * ratio)
193
+ # 兼容不同版本的 Pillow
194
+ try:
195
+ resample = Image.Resampling.LANCZOS
196
+ except AttributeError:
197
+ try:
198
+ resample = Image.LANCZOS
199
+ except AttributeError:
200
+ resample = Image.ANTIALIAS
201
+ img = img.resize((new_w, new_h), resample)
202
+ image_width, image_height = new_w, new_h
203
+
204
+ # 生成文件名(JPEG 格式)
205
+ if description:
206
+ safe_desc = re.sub(r'[^\w\s-]', '', description).strip().replace(' ', '_')
207
+ filename = f"screenshot_{platform}_{safe_desc}_{timestamp}.jpg"
208
+ else:
209
+ filename = f"screenshot_{platform}_{timestamp}.jpg"
210
+
211
+ final_path = self.screenshot_dir / filename
212
+
213
+ # 保存为 JPEG(处理透明通道)
214
+ if img.mode in ('RGBA', 'LA', 'P'):
215
+ background = Image.new('RGB', img.size, (255, 255, 255))
216
+ if img.mode == 'P':
217
+ img = img.convert('RGBA')
218
+ background.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
219
+ img = background
220
+ elif img.mode != 'RGB':
221
+ img = img.convert("RGB")
222
+
223
+ img.save(str(final_path), "JPEG", quality=quality)
224
+ temp_path.unlink()
225
+
226
+ compressed_size = final_path.stat().st_size
227
+ saved_percent = (1 - compressed_size / original_size) * 100
228
+
229
+ return {
230
+ "success": True,
231
+ "screenshot_path": str(final_path),
232
+ "screen_width": screen_width,
233
+ "screen_height": screen_height,
234
+ "original_img_width": original_img_width, # 截图原始宽度
235
+ "original_img_height": original_img_height, # 截图原始高度
236
+ "image_width": image_width, # 压缩后宽度(AI 看到的)
237
+ "image_height": image_height, # 压缩后高度(AI 看到的)
238
+ "original_size": f"{original_size/1024:.1f}KB",
239
+ "compressed_size": f"{compressed_size/1024:.1f}KB",
240
+ "saved_percent": f"{saved_percent:.0f}%",
241
+ "message": f"📸 截图已保存: {final_path}\n"
242
+ f"📐 原始尺寸: {original_img_width}x{original_img_height} → 压缩后: {image_width}x{image_height}\n"
243
+ f"📦 已压缩: {original_size/1024:.0f}KB → {compressed_size/1024:.0f}KB (省 {saved_percent:.0f}%)\n"
244
+ f"⚠️ 【坐标转换】AI 返回坐标后,请传入:\n"
245
+ f" image_width={image_width}, image_height={image_height},\n"
246
+ f" original_img_width={original_img_width}, original_img_height={original_img_height}"
247
+ }
248
+
249
+ # ========== 情况3:全屏不压缩截图 ==========
250
+ else:
251
+ if description:
252
+ safe_desc = re.sub(r'[^\w\s-]', '', description).strip().replace(' ', '_')
253
+ filename = f"screenshot_{platform}_{safe_desc}_{timestamp}.png"
254
+ else:
255
+ filename = f"screenshot_{platform}_{timestamp}.png"
256
+
257
+ final_path = self.screenshot_dir / filename
258
+ temp_path.rename(final_path)
259
+
260
+ # 不压缩时,用截图实际尺寸(可能和 screen_width 不同)
261
+ return {
262
+ "success": True,
263
+ "screenshot_path": str(final_path),
264
+ "screen_width": screen_width,
265
+ "screen_height": screen_height,
266
+ "original_img_width": img.width, # 截图实际尺寸
267
+ "original_img_height": img.height,
268
+ "image_width": img.width, # 未压缩,和原图一样
269
+ "image_height": img.height,
270
+ "file_size": f"{original_size/1024:.1f}KB",
271
+ "message": f"📸 截图已保存: {final_path}\n"
272
+ f"📐 截图尺寸: {img.width}x{img.height}\n"
273
+ f"📦 文件大小: {original_size/1024:.0f}KB(未压缩)\n"
274
+ f"💡 未压缩,坐标可直接使用"
275
+ }
276
+ except ImportError:
277
+ # 如果没有 PIL,回退到原始方式(不压缩)
278
+ return self._take_screenshot_no_compress(description)
279
+ except Exception as e:
280
+ return {"success": False, "message": f"❌ 截图失败: {e}"}
281
+
282
+ def take_screenshot_with_grid(self, grid_size: int = 100, show_popup_hints: bool = True) -> Dict:
283
+ """截图并添加网格坐标标注(用于精确定位元素)
284
+
285
+ 在截图上绘制网格线和坐标刻度,帮助快速定位元素位置。
286
+ 如果检测到弹窗,会标注弹窗区域和可能的关闭按钮位置。
287
+
288
+ Args:
289
+ grid_size: 网格间距(像素),默认 100。建议值:50-200
290
+ show_popup_hints: 是否显示弹窗关闭按钮提示位置,默认 True
291
+
292
+ Returns:
293
+ 包含标注截图路径和弹窗信息的字典
294
+ """
295
+ try:
296
+ from PIL import Image, ImageDraw, ImageFont
297
+ import re
298
+
299
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
300
+ platform = "ios" if self._is_ios() else "android"
301
+
302
+ # 第1步:截图
303
+ temp_filename = f"temp_grid_{timestamp}.png"
304
+ temp_path = self.screenshot_dir / temp_filename
305
+
306
+ screen_width, screen_height = 0, 0
307
+ if self._is_ios():
308
+ ios_client = self._get_ios_client()
309
+ if ios_client and hasattr(ios_client, 'wda'):
310
+ ios_client.wda.screenshot(str(temp_path))
311
+ size = ios_client.wda.window_size()
312
+ screen_width, screen_height = size[0], size[1]
313
+ else:
314
+ return {"success": False, "message": "❌ iOS 客户端未初始化"}
315
+ else:
316
+ self.client.u2.screenshot(str(temp_path))
317
+ info = self.client.u2.info
318
+ screen_width = info.get('displayWidth', 720)
319
+ screen_height = info.get('displayHeight', 1280)
320
+
321
+ img = Image.open(temp_path)
322
+ draw = ImageDraw.Draw(img, 'RGBA')
323
+
324
+ # 尝试加载字体
325
+ try:
326
+ font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 14)
327
+ font_small = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 11)
328
+ except:
329
+ font = ImageFont.load_default()
330
+ font_small = font
331
+
332
+ img_width, img_height = img.size
333
+
334
+ # 第2步:绘制网格线和坐标
335
+ grid_color = (255, 0, 0, 80) # 半透明红色
336
+ text_color = (255, 0, 0, 200) # 红色文字
337
+
338
+ # 绘制垂直网格线
339
+ for x in range(0, img_width, grid_size):
340
+ draw.line([(x, 0), (x, img_height)], fill=grid_color, width=1)
341
+ # 顶部标注 X 坐标
342
+ draw.text((x + 2, 2), str(x), fill=text_color, font=font_small)
343
+
344
+ # 绘制水平网格线
345
+ for y in range(0, img_height, grid_size):
346
+ draw.line([(0, y), (img_width, y)], fill=grid_color, width=1)
347
+ # 左侧标注 Y 坐标
348
+ draw.text((2, y + 2), str(y), fill=text_color, font=font_small)
349
+
350
+ # 第3步:检测弹窗并标注
351
+ popup_info = None
352
+ close_positions = []
353
+
354
+ if show_popup_hints and not self._is_ios():
355
+ try:
356
+ import xml.etree.ElementTree as ET
357
+ xml_string = self.client.u2.dump_hierarchy()
358
+ root = ET.fromstring(xml_string)
359
+
360
+ # 检测弹窗区域
361
+ popup_bounds = None
362
+ for elem in root.iter():
363
+ bounds_str = elem.attrib.get('bounds', '')
364
+ class_name = elem.attrib.get('class', '')
365
+
366
+ if not bounds_str:
367
+ continue
368
+
369
+ match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds_str)
370
+ if not match:
371
+ continue
372
+
373
+ x1, y1, x2, y2 = map(int, match.groups())
374
+ width = x2 - x1
375
+ height = y2 - y1
376
+ area = width * height
377
+ screen_area = screen_width * screen_height
378
+
379
+ is_container = any(kw in class_name for kw in ['Layout', 'View', 'Dialog', 'Card'])
380
+ area_ratio = area / screen_area if screen_area > 0 else 0
381
+ is_not_fullscreen = (width < screen_width * 0.98 or height < screen_height * 0.98)
382
+ is_reasonable_size = 0.08 < area_ratio < 0.85
383
+
384
+ if is_container and is_not_fullscreen and is_reasonable_size and y1 > 50:
385
+ if popup_bounds is None or area > (popup_bounds[2] - popup_bounds[0]) * (popup_bounds[3] - popup_bounds[1]):
386
+ popup_bounds = (x1, y1, x2, y2)
387
+
388
+ if popup_bounds:
389
+ px1, py1, px2, py2 = popup_bounds
390
+ popup_width = px2 - px1
391
+ popup_height = py2 - py1
392
+
393
+ # 绘制弹窗边框(蓝色)
394
+ draw.rectangle([px1, py1, px2, py2], outline=(0, 100, 255, 200), width=3)
395
+ draw.text((px1 + 5, py1 + 5), f"弹窗区域", fill=(0, 100, 255), font=font)
396
+
397
+ # 计算可能的 X 按钮位置(基于弹窗尺寸动态计算,适配不同分辨率)
398
+ offset_x = max(25, int(popup_width * 0.05)) # 宽度的5%,最小25px
399
+ offset_y = max(25, int(popup_height * 0.04)) # 高度的4%,最小25px
400
+ outer_offset = max(15, int(popup_width * 0.025)) # 外部偏移
401
+
402
+ close_positions = [
403
+ {"name": "右上角内", "x": px2 - offset_x, "y": py1 + offset_y, "priority": 1},
404
+ {"name": "右上角外", "x": px2 + outer_offset, "y": py1 - outer_offset, "priority": 2},
405
+ {"name": "正上方", "x": (px1 + px2) // 2, "y": py1 - offset_y, "priority": 3},
406
+ {"name": "底部下方", "x": (px1 + px2) // 2, "y": py2 + offset_y, "priority": 4},
407
+ ]
408
+
409
+ # 绘制可能的 X 按钮位置(绿色圆圈 + 数字)
410
+ for i, pos in enumerate(close_positions):
411
+ cx, cy = pos["x"], pos["y"]
412
+ if 0 <= cx <= img_width and 0 <= cy <= img_height:
413
+ # 绿色圆圈
414
+ draw.ellipse([cx-15, cy-15, cx+15, cy+15],
415
+ outline=(0, 255, 0, 200), width=2)
416
+ # 数字标注
417
+ draw.text((cx-5, cy-8), str(i+1), fill=(0, 255, 0), font=font)
418
+ # 坐标标注
419
+ draw.text((cx+18, cy-8), f"({cx},{cy})", fill=(0, 255, 0), font=font_small)
420
+
421
+ popup_info = {
422
+ "bounds": f"[{px1},{py1}][{px2},{py2}]",
423
+ "width": px2 - px1,
424
+ "height": py2 - py1,
425
+ "close_positions": close_positions
426
+ }
427
+
428
+ except Exception as e:
429
+ pass # 弹窗检测失败不影响主功能
430
+
431
+ # 第4步:保存标注后的截图
432
+ filename = f"screenshot_{platform}_grid_{timestamp}.jpg"
433
+ final_path = self.screenshot_dir / filename
434
+
435
+ # 转换为 RGB 并保存
436
+ if img.mode in ('RGBA', 'LA', 'P'):
437
+ background = Image.new('RGB', img.size, (255, 255, 255))
438
+ if img.mode == 'P':
439
+ img = img.convert('RGBA')
440
+ background.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
441
+ img = background
442
+ elif img.mode != 'RGB':
443
+ img = img.convert("RGB")
444
+
445
+ img.save(str(final_path), "JPEG", quality=85)
446
+ temp_path.unlink()
447
+
448
+ result = {
449
+ "success": True,
450
+ "screenshot_path": str(final_path),
451
+ "screen_width": screen_width,
452
+ "screen_height": screen_height,
453
+ "image_width": img_width,
454
+ "image_height": img_height,
455
+ "grid_size": grid_size,
456
+ "message": f"📸 网格截图已保存: {final_path}\n"
457
+ f"📐 尺寸: {img_width}x{img_height}\n"
458
+ f"📏 网格间距: {grid_size}px"
459
+ }
460
+
461
+ if popup_info:
462
+ result["popup_detected"] = True
463
+ result["popup_bounds"] = popup_info["bounds"]
464
+ result["close_button_hints"] = close_positions
465
+ result["message"] += f"\n🎯 检测到弹窗: {popup_info['bounds']}"
466
+ result["message"] += f"\n💡 可能的关闭按钮位置(绿色圆圈标注):"
467
+ for pos in close_positions:
468
+ result["message"] += f"\n {pos['priority']}. {pos['name']}: ({pos['x']}, {pos['y']})"
469
+ else:
470
+ result["popup_detected"] = False
471
+
472
+ return result
473
+
474
+ except ImportError:
475
+ return {"success": False, "message": "❌ 需要安装 Pillow: pip install Pillow"}
476
+ except Exception as e:
477
+ return {"success": False, "message": f"❌ 网格截图失败: {e}"}
478
+
479
+ def take_screenshot_with_som(self) -> Dict:
480
+ """Set-of-Mark 截图:给每个可点击元素标上数字(超级好用!)
481
+
482
+ 在截图上给每个可点击元素画框并标上数字编号。
483
+ AI 看图后直接说"点击 3 号",然后调用 click_by_som(3) 即可。
484
+
485
+ Returns:
486
+ 包含标注截图和元素列表的字典
487
+ """
488
+ try:
489
+ from PIL import Image, ImageDraw, ImageFont
490
+ import re
491
+
492
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
493
+ platform = "ios" if self._is_ios() else "android"
494
+
495
+ # 第1步:截图
496
+ temp_filename = f"temp_som_{timestamp}.png"
497
+ temp_path = self.screenshot_dir / temp_filename
498
+
499
+ screen_width, screen_height = 0, 0
500
+ if self._is_ios():
501
+ ios_client = self._get_ios_client()
502
+ if ios_client and hasattr(ios_client, 'wda'):
503
+ ios_client.wda.screenshot(str(temp_path))
504
+ size = ios_client.wda.window_size()
505
+ screen_width, screen_height = size[0], size[1]
506
+ else:
507
+ return {"success": False, "message": "❌ iOS 客户端未初始化"}
508
+ else:
509
+ self.client.u2.screenshot(str(temp_path))
510
+ info = self.client.u2.info
511
+ screen_width = info.get('displayWidth', 720)
512
+ screen_height = info.get('displayHeight', 1280)
513
+
514
+ img = Image.open(temp_path)
515
+ draw = ImageDraw.Draw(img, 'RGBA')
516
+ img_width, img_height = img.size
517
+
518
+ # 尝试加载字体
519
+ try:
520
+ font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 16)
521
+ font_small = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 12)
522
+ except:
523
+ font = ImageFont.load_default()
524
+ font_small = font
525
+
526
+ # 第2步:获取所有可点击元素
527
+ elements = []
528
+ if self._is_ios():
529
+ # iOS 暂不支持
530
+ pass
531
+ else:
532
+ try:
533
+ import xml.etree.ElementTree as ET
534
+ xml_string = self.client.u2.dump_hierarchy()
535
+ root = ET.fromstring(xml_string)
536
+
537
+ for elem in root.iter():
538
+ clickable = elem.attrib.get('clickable', 'false') == 'true'
539
+ bounds_str = elem.attrib.get('bounds', '')
540
+ text = elem.attrib.get('text', '')
541
+ content_desc = elem.attrib.get('content-desc', '')
542
+ resource_id = elem.attrib.get('resource-id', '')
543
+ class_name = elem.attrib.get('class', '')
544
+
545
+ if not clickable or not bounds_str:
546
+ continue
547
+
548
+ match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds_str)
549
+ if not match:
550
+ continue
551
+
552
+ x1, y1, x2, y2 = map(int, match.groups())
553
+ width = x2 - x1
554
+ height = y2 - y1
555
+
556
+ # 过滤太小或太大的元素
557
+ if width < 20 or height < 20:
558
+ continue
559
+ if width >= screen_width * 0.98 and height >= screen_height * 0.5:
560
+ continue # 全屏或大面积容器
561
+
562
+ center_x = (x1 + x2) // 2
563
+ center_y = (y1 + y2) // 2
564
+
565
+ # 生成描述
566
+ desc = text or content_desc or resource_id.split('/')[-1] if resource_id else class_name.split('.')[-1]
567
+ if len(desc) > 20:
568
+ desc = desc[:17] + "..."
569
+
570
+ elements.append({
571
+ 'bounds': (x1, y1, x2, y2),
572
+ 'center': (center_x, center_y),
573
+ 'text': text,
574
+ 'desc': desc,
575
+ 'resource_id': resource_id
576
+ })
577
+ except Exception as e:
578
+ pass
579
+
580
+ # 第3步:在截图上标注元素
581
+ # 颜色列表(循环使用)
582
+ colors = [
583
+ (255, 0, 0), # 红
584
+ (0, 255, 0), # 绿
585
+ (0, 100, 255), # 蓝
586
+ (255, 165, 0), # 橙
587
+ (255, 0, 255), # 紫
588
+ (0, 255, 255), # 青
589
+ ]
590
+
591
+ som_elements = [] # 保存标注信息,供 click_by_som 使用
592
+
593
+ for i, elem in enumerate(elements):
594
+ x1, y1, x2, y2 = elem['bounds']
595
+ cx, cy = elem['center']
596
+ color = colors[i % len(colors)]
597
+
598
+ # 画边框
599
+ draw.rectangle([x1, y1, x2, y2], outline=color + (200,), width=2)
600
+
601
+ # 画编号标签背景
602
+ label = str(i + 1)
603
+ label_w, label_h = 20, 18
604
+ label_x = x1
605
+ label_y = max(0, y1 - label_h - 2)
606
+ draw.rectangle([label_x, label_y, label_x + label_w, label_y + label_h],
607
+ fill=color + (220,))
608
+
609
+ # 画编号文字
610
+ draw.text((label_x + 4, label_y + 1), label, fill=(255, 255, 255), font=font_small)
611
+
612
+ som_elements.append({
613
+ 'index': i + 1,
614
+ 'center': (cx, cy),
615
+ 'bounds': f"[{x1},{y1}][{x2},{y2}]",
616
+ 'desc': elem['desc']
617
+ })
618
+
619
+ # 第3.5步:检测弹窗区域(用于标注)
620
+ popup_bounds = None
621
+
622
+ if not self._is_ios():
623
+ try:
624
+ # 检测弹窗区域
625
+ for elem in root.iter():
626
+ bounds_str = elem.attrib.get('bounds', '')
627
+ class_name = elem.attrib.get('class', '')
628
+
629
+ if not bounds_str:
630
+ continue
631
+
632
+ match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds_str)
633
+ if not match:
634
+ continue
635
+
636
+ px1, py1, px2, py2 = map(int, match.groups())
637
+ p_width = px2 - px1
638
+ p_height = py2 - py1
639
+ p_area = p_width * p_height
640
+ screen_area = screen_width * screen_height
641
+
642
+ is_container = any(kw in class_name for kw in ['Layout', 'View', 'Dialog', 'Card', 'Frame'])
643
+ area_ratio = p_area / screen_area if screen_area > 0 else 0
644
+ is_not_fullscreen = (p_width < screen_width * 0.99 or p_height < screen_height * 0.95)
645
+ # 放宽面积范围:5% - 95%
646
+ is_reasonable_size = 0.05 < area_ratio < 0.95
647
+
648
+ if is_container and is_not_fullscreen and is_reasonable_size and py1 > 30:
649
+ if popup_bounds is None or p_area > (popup_bounds[2] - popup_bounds[0]) * (popup_bounds[3] - popup_bounds[1]):
650
+ popup_bounds = (px1, py1, px2, py2)
651
+
652
+ # 如果检测到弹窗,标注弹窗边界(不再猜测X按钮位置)
653
+ if popup_bounds:
654
+ px1, py1, px2, py2 = popup_bounds
655
+
656
+ # 只画弹窗边框(蓝色),不再猜测X按钮位置
657
+ draw.rectangle([px1, py1, px2, py2], outline=(0, 150, 255, 180), width=3)
658
+
659
+ # 在弹窗边框上标注提示文字
660
+ try:
661
+ draw.text((px1+5, py1-25), "弹窗区域", fill=(0, 150, 255), font=font_small)
662
+ except:
663
+ pass
664
+
665
+ except Exception as e:
666
+ pass # 弹窗检测失败不影响主功能
667
+
668
+ # 保存到实例变量,供 click_by_som 使用
669
+ self._som_elements = som_elements
670
+
671
+ # 第4步:保存标注后的截图
672
+ filename = f"screenshot_{platform}_som_{timestamp}.jpg"
673
+ final_path = self.screenshot_dir / filename
674
+
675
+ if img.mode in ('RGBA', 'LA', 'P'):
676
+ background = Image.new('RGB', img.size, (255, 255, 255))
677
+ if img.mode == 'P':
678
+ img = img.convert('RGBA')
679
+ background.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
680
+ img = background
681
+ elif img.mode != 'RGB':
682
+ img = img.convert("RGB")
683
+
684
+ img.save(str(final_path), "JPEG", quality=85)
685
+ temp_path.unlink()
686
+
687
+ # 构建元素列表文字
688
+ elements_text = "\n".join([
689
+ f" [{e['index']}] {e['desc']} → ({e['center'][0]}, {e['center'][1]})"
690
+ for e in som_elements[:15] # 只显示前15个
691
+ ])
692
+ if len(som_elements) > 15:
693
+ elements_text += f"\n ... 还有 {len(som_elements) - 15} 个元素"
694
+
695
+ # 构建弹窗提示文字
696
+ hints_text = ""
697
+ if popup_bounds:
698
+ hints_text = f"\n🎯 检测到弹窗区域(蓝色边框)\n"
699
+ hints_text += f" 如需关闭弹窗,请观察图片中的 X 按钮位置\n"
700
+ hints_text += f" 然后使用 mobile_click_by_percent(x%, y%) 点击"
701
+
702
+ return {
703
+ "success": True,
704
+ "screenshot_path": str(final_path),
705
+ "screen_width": screen_width,
706
+ "screen_height": screen_height,
707
+ "image_width": img_width,
708
+ "image_height": img_height,
709
+ "element_count": len(som_elements),
710
+ "elements": som_elements,
711
+ "popup_detected": popup_bounds is not None,
712
+ "popup_bounds": f"[{popup_bounds[0]},{popup_bounds[1]}][{popup_bounds[2]},{popup_bounds[3]}]" if popup_bounds else None,
713
+ "message": f"📸 SoM 截图已保存: {final_path}\n"
714
+ f"🏷️ 已标注 {len(som_elements)} 个可点击元素\n"
715
+ f"📋 元素列表:\n{elements_text}{hints_text}\n\n"
716
+ f"💡 使用方法:\n"
717
+ f" - 点击标注元素:mobile_click_by_som(编号)\n"
718
+ f" - 点击任意位置:mobile_click_by_percent(x%, y%)"
719
+ }
720
+
721
+ except ImportError:
722
+ return {"success": False, "message": "❌ 需要安装 Pillow: pip install Pillow"}
723
+ except Exception as e:
724
+ return {"success": False, "message": f"❌ SoM 截图失败: {e}"}
725
+
726
+ def click_by_som(self, index: int) -> Dict:
727
+ """根据 SoM 编号点击元素
728
+
729
+ 配合 take_screenshot_with_som 使用。
730
+ 看图后直接说"点击 3 号",调用此函数即可。
731
+
732
+ Args:
733
+ index: 元素编号(从 1 开始)
734
+
735
+ Returns:
736
+ 点击结果
737
+ """
738
+ try:
739
+ if not hasattr(self, '_som_elements') or not self._som_elements:
740
+ return {
741
+ "success": False,
742
+ "message": "❌ 请先调用 mobile_screenshot_with_som 获取元素列表"
743
+ }
744
+
745
+ # 查找对应编号的元素
746
+ target = None
747
+ for elem in self._som_elements:
748
+ if elem['index'] == index:
749
+ target = elem
750
+ break
751
+
752
+ if not target:
753
+ return {
754
+ "success": False,
755
+ "message": f"❌ 未找到编号 {index} 的元素,有效范围: 1-{len(self._som_elements)}"
756
+ }
757
+
758
+ # 点击
759
+ cx, cy = target['center']
760
+ if self._is_ios():
761
+ ios_client = self._get_ios_client()
762
+ if ios_client and hasattr(ios_client, 'wda'):
763
+ ios_client.wda.click(cx, cy)
764
+ else:
765
+ self.client.u2.click(cx, cy)
766
+
767
+ time.sleep(0.3)
768
+
769
+ return {
770
+ "success": True,
771
+ "message": f"✅ 已点击 [{index}] {target['desc']} → ({cx}, {cy})\n💡 建议:再次截图确认操作是否成功",
772
+ "clicked": {
773
+ "index": index,
774
+ "desc": target['desc'],
775
+ "coords": (cx, cy),
776
+ "bounds": target['bounds']
777
+ }
778
+ }
779
+
780
+ except Exception as e:
781
+ return {"success": False, "message": f"❌ 点击失败: {e}\n💡 如果页面已变化,请重新调用 mobile_screenshot_with_som 刷新元素列表"}
782
+
783
+ def _take_screenshot_no_compress(self, description: str = "") -> Dict:
784
+ """截图(不压缩,PIL 不可用时的备用方案)"""
785
+ try:
786
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
787
+ platform = "ios" if self._is_ios() else "android"
788
+
789
+ if description:
790
+ safe_desc = re.sub(r'[^\w\s-]', '', description).strip().replace(' ', '_')
791
+ filename = f"screenshot_{platform}_{safe_desc}_{timestamp}.png"
792
+ else:
793
+ filename = f"screenshot_{platform}_{timestamp}.png"
794
+
795
+ screenshot_path = self.screenshot_dir / filename
796
+
797
+ width, height = 0, 0
798
+ if self._is_ios():
799
+ ios_client = self._get_ios_client()
800
+ if ios_client and hasattr(ios_client, 'wda'):
801
+ ios_client.wda.screenshot(str(screenshot_path))
802
+ size = ios_client.wda.window_size()
803
+ width, height = size[0], size[1]
804
+ else:
805
+ return {"success": False, "message": "❌ iOS 客户端未初始化"}
806
+ else:
807
+ self.client.u2.screenshot(str(screenshot_path))
808
+ info = self.client.u2.info
809
+ width = info.get('displayWidth', 0)
810
+ height = info.get('displayHeight', 0)
811
+
812
+ # 不压缩时,图片尺寸 = 屏幕尺寸
813
+ return {
814
+ "success": True,
815
+ "screenshot_path": str(screenshot_path),
816
+ "screen_width": width,
817
+ "screen_height": height,
818
+ "image_width": width,
819
+ "image_height": height,
820
+ "message": f"📸 截图已保存: {screenshot_path}\n"
821
+ f"📐 屏幕尺寸: {width}x{height}\n"
822
+ f"⚠️ 未压缩(PIL 未安装),建议安装: pip install Pillow"
823
+ }
824
+ except Exception as e:
825
+ return {"success": False, "message": f"❌ 截图失败: {e}"}
826
+
827
+ def get_screen_size(self) -> Dict:
828
+ """获取屏幕尺寸"""
829
+ try:
830
+ if self._is_ios():
831
+ ios_client = self._get_ios_client()
832
+ if ios_client and hasattr(ios_client, 'wda'):
833
+ size = ios_client.wda.window_size()
834
+ return {
835
+ "success": True,
836
+ "width": size[0],
837
+ "height": size[1],
838
+ "size": f"{size[0]}x{size[1]}"
839
+ }
840
+ else:
841
+ info = self.client.u2.info
842
+ width = info.get('displayWidth', 0)
843
+ height = info.get('displayHeight', 0)
844
+ return {
845
+ "success": True,
846
+ "width": width,
847
+ "height": height,
848
+ "size": f"{width}x{height}"
849
+ }
850
+ except Exception as e:
851
+ return {"success": False, "message": f"❌ 获取屏幕尺寸失败: {e}"}
852
+
853
+ # ==================== 点击操作 ====================
854
+
855
+ def click_at_coords(self, x: int, y: int, image_width: int = 0, image_height: int = 0,
856
+ crop_offset_x: int = 0, crop_offset_y: int = 0,
857
+ original_img_width: int = 0, original_img_height: int = 0) -> Dict:
858
+ """点击坐标(核心功能,支持自动坐标转换)
859
+
860
+ Args:
861
+ x: X 坐标(来自截图分析或屏幕坐标)
862
+ y: Y 坐标(来自截图分析或屏幕坐标)
863
+ image_width: 压缩后图片宽度(AI 看到的图片尺寸)
864
+ image_height: 压缩后图片高度(AI 看到的图片尺寸)
865
+ crop_offset_x: 局部截图的 X 偏移量(局部截图时传入)
866
+ crop_offset_y: 局部截图的 Y 偏移量(局部截图时传入)
867
+ original_img_width: 截图原始宽度(压缩前的尺寸,用于精确转换)
868
+ original_img_height: 截图原始高度(压缩前的尺寸,用于精确转换)
869
+
870
+ 坐标转换说明:
871
+ 1. 全屏压缩截图:AI 坐标 → 原图坐标(基于 image/original_img 比例)
872
+ 2. 局部裁剪截图:AI 坐标 + 偏移量 = 屏幕坐标
873
+ """
874
+ try:
875
+ # 获取屏幕尺寸
876
+ screen_width, screen_height = 0, 0
877
+ if self._is_ios():
878
+ ios_client = self._get_ios_client()
879
+ if ios_client and hasattr(ios_client, 'wda'):
880
+ size = ios_client.wda.window_size()
881
+ screen_width, screen_height = size[0], size[1]
882
+ else:
883
+ return {"success": False, "message": "❌ iOS 客户端未初始化"}
884
+ else:
885
+ info = self.client.u2.info
886
+ screen_width = info.get('displayWidth', 0)
887
+ screen_height = info.get('displayHeight', 0)
888
+
889
+ # 🎯 坐标转换
890
+ original_x, original_y = x, y
891
+ converted = False
892
+ conversion_type = ""
893
+
894
+ # 情况1:局部裁剪截图 - 加上偏移量
895
+ if crop_offset_x > 0 or crop_offset_y > 0:
896
+ x = x + crop_offset_x
897
+ y = y + crop_offset_y
898
+ converted = True
899
+ conversion_type = "crop_offset"
900
+ # 情况2:全屏压缩截图 - 按比例转换到原图尺寸
901
+ elif image_width > 0 and image_height > 0:
902
+ # 优先使用 original_img_width/height(更精确)
903
+ # 如果没传,则用 screen_width/height(兼容旧版本)
904
+ target_width = original_img_width if original_img_width > 0 else screen_width
905
+ target_height = original_img_height if original_img_height > 0 else screen_height
906
+
907
+ if target_width > 0 and target_height > 0:
908
+ if image_width != target_width or image_height != target_height:
909
+ x = int(x * target_width / image_width)
910
+ y = int(y * target_height / image_height)
911
+ converted = True
912
+ conversion_type = "scale"
913
+
914
+ # 执行点击
915
+ if self._is_ios():
916
+ ios_client = self._get_ios_client()
917
+ ios_client.wda.click(x, y)
918
+ else:
919
+ self.client.u2.click(x, y)
920
+
921
+ time.sleep(0.3)
922
+
923
+ # 计算百分比坐标(用于跨设备兼容)
924
+ x_percent = round(x / screen_width * 100, 1) if screen_width > 0 else 0
925
+ y_percent = round(y / screen_height * 100, 1) if screen_height > 0 else 0
926
+
927
+ # 记录操作(包含屏幕尺寸和百分比,便于脚本生成时转换)
928
+ self._record_operation(
929
+ 'click',
930
+ x=x,
931
+ y=y,
932
+ x_percent=x_percent,
933
+ y_percent=y_percent,
934
+ screen_width=screen_width,
935
+ screen_height=screen_height,
936
+ ref=f"coords_{x}_{y}"
937
+ )
938
+
939
+ if converted:
940
+ if conversion_type == "crop_offset":
941
+ return {
942
+ "success": True,
943
+ "message": f"✅ 点击成功: ({x}, {y})\n"
944
+ f" 🔍 局部截图坐标转换: ({original_x},{original_y}) + 偏移({crop_offset_x},{crop_offset_y}) → ({x},{y})"
945
+ }
946
+ else:
947
+ return {
948
+ "success": True,
949
+ "message": f"✅ 点击成功: ({x}, {y})\n"
950
+ f" 📐 坐标已转换: ({original_x},{original_y}) → ({x},{y})\n"
951
+ f" 🖼️ 图片尺寸: {image_width}x{image_height} → 屏幕: {screen_width}x{screen_height}"
952
+ }
953
+ else:
954
+ return {
955
+ "success": True,
956
+ "message": f"✅ 点击成功: ({x}, {y}) [相对位置: {x_percent}%, {y_percent}%]"
957
+ }
958
+ except Exception as e:
959
+ return {"success": False, "message": f"❌ 点击失败: {e}"}
960
+
961
+ def click_by_percent(self, x_percent: float, y_percent: float) -> Dict:
962
+ """通过百分比坐标点击(跨设备兼容)
963
+
964
+ 百分比坐标原理:
965
+ - 屏幕左上角是 (0%, 0%),右下角是 (100%, 100%)
966
+ - 屏幕正中央是 (50%, 50%)
967
+ - 像素坐标 = 屏幕尺寸 × (百分比 / 100)
968
+
969
+ Args:
970
+ x_percent: X轴百分比 (0-100),0=最左,50=中间,100=最右
971
+ y_percent: Y轴百分比 (0-100),0=最上,50=中间,100=最下
972
+
973
+ 示例:
974
+ click_by_percent(50, 50) # 点击屏幕正中央
975
+ click_by_percent(10, 5) # 点击左上角附近
976
+ click_by_percent(85, 90) # 点击右下角附近
977
+
978
+ 优势:
979
+ - 同样的百分比在不同分辨率设备上都能点到相同相对位置
980
+ - 录制一次,多设备回放
981
+ """
982
+ try:
983
+ # 第1步:获取屏幕尺寸
984
+ if self._is_ios():
985
+ ios_client = self._get_ios_client()
986
+ if ios_client and hasattr(ios_client, 'wda'):
987
+ size = ios_client.wda.window_size()
988
+ width, height = size[0], size[1]
989
+ else:
990
+ return {"success": False, "message": "❌ iOS 客户端未初始化"}
991
+ else:
992
+ info = self.client.u2.info
993
+ width = info.get('displayWidth', 0)
994
+ height = info.get('displayHeight', 0)
995
+
996
+ if width == 0 or height == 0:
997
+ return {"success": False, "message": "❌ 无法获取屏幕尺寸"}
998
+
999
+ # 第2步:百分比转像素坐标
1000
+ # 公式:像素 = 屏幕尺寸 × (百分比 / 100)
1001
+ x = int(width * x_percent / 100)
1002
+ y = int(height * y_percent / 100)
1003
+
1004
+ # 第3步:执行点击
1005
+ if self._is_ios():
1006
+ ios_client.wda.click(x, y)
1007
+ else:
1008
+ self.client.u2.click(x, y)
1009
+
1010
+ time.sleep(0.3)
1011
+
1012
+ # 第4步:记录操作(同时记录百分比和像素)
1013
+ self._record_operation(
1014
+ 'click',
1015
+ x=x,
1016
+ y=y,
1017
+ x_percent=x_percent,
1018
+ y_percent=y_percent,
1019
+ screen_width=width,
1020
+ screen_height=height,
1021
+ ref=f"percent_{x_percent}_{y_percent}"
1022
+ )
1023
+
1024
+ return {
1025
+ "success": True,
1026
+ "message": f"✅ 百分比点击成功: ({x_percent}%, {y_percent}%) → 像素({x}, {y})",
1027
+ "screen_size": {"width": width, "height": height},
1028
+ "percent": {"x": x_percent, "y": y_percent},
1029
+ "pixel": {"x": x, "y": y}
1030
+ }
1031
+ except Exception as e:
1032
+ return {"success": False, "message": f"❌ 百分比点击失败: {e}"}
1033
+
1034
+ def click_by_text(self, text: str, timeout: float = 3.0) -> Dict:
1035
+ """通过文本点击 - 先查 XML 树,再精准匹配"""
1036
+ try:
1037
+ if self._is_ios():
1038
+ ios_client = self._get_ios_client()
1039
+ if ios_client and hasattr(ios_client, 'wda'):
1040
+ elem = ios_client.wda(name=text)
1041
+ if not elem.exists:
1042
+ elem = ios_client.wda(label=text)
1043
+ if elem.exists:
1044
+ elem.click()
1045
+ time.sleep(0.3)
1046
+ self._record_operation('click', element=text, ref=text)
1047
+ return {"success": True, "message": f"✅ 点击成功: '{text}'"}
1048
+ return {"success": False, "message": f"❌ 文本不存在: {text}"}
1049
+ else:
1050
+ # 🔍 先查 XML 树,找到元素及其属性
1051
+ found_elem = self._find_element_in_tree(text)
1052
+
1053
+ if found_elem:
1054
+ attr_type = found_elem['attr_type']
1055
+ attr_value = found_elem['attr_value']
1056
+ bounds = found_elem.get('bounds')
1057
+
1058
+ # 根据找到的属性类型,使用对应的选择器
1059
+ if attr_type == 'text':
1060
+ elem = self.client.u2(text=attr_value)
1061
+ elif attr_type == 'textContains':
1062
+ elem = self.client.u2(textContains=attr_value)
1063
+ elif attr_type == 'description':
1064
+ elem = self.client.u2(description=attr_value)
1065
+ elif attr_type == 'descriptionContains':
1066
+ elem = self.client.u2(descriptionContains=attr_value)
1067
+ else:
1068
+ elem = None
1069
+
1070
+ if elem and elem.exists(timeout=1):
1071
+ elem.click()
1072
+ time.sleep(0.3)
1073
+ self._record_operation('click', element=text, ref=f"{attr_type}:{attr_value}")
1074
+ return {"success": True, "message": f"✅ 点击成功({attr_type}): '{text}'"}
1075
+
1076
+ # 如果选择器失败,用坐标兜底
1077
+ if bounds:
1078
+ x = (bounds[0] + bounds[2]) // 2
1079
+ y = (bounds[1] + bounds[3]) // 2
1080
+ self.client.u2.click(x, y)
1081
+ time.sleep(0.3)
1082
+ self._record_operation('click', element=text, x=x, y=y, ref=f"coords:{x},{y}")
1083
+ return {"success": True, "message": f"✅ 点击成功(坐标兜底): '{text}' @ ({x},{y})"}
1084
+
1085
+ return {"success": False, "message": f"❌ 文本不存在: {text}"}
1086
+ except Exception as e:
1087
+ return {"success": False, "message": f"❌ 点击失败: {e}"}
1088
+
1089
+ def _find_element_in_tree(self, text: str) -> Optional[Dict]:
1090
+ """在 XML 树中查找包含指定文本的元素"""
1091
+ try:
1092
+ xml = self.client.u2.dump_hierarchy()
1093
+ import xml.etree.ElementTree as ET
1094
+ root = ET.fromstring(xml)
1095
+
1096
+ for elem in root.iter():
1097
+ elem_text = elem.attrib.get('text', '')
1098
+ elem_desc = elem.attrib.get('content-desc', '')
1099
+ bounds_str = elem.attrib.get('bounds', '')
1100
+
1101
+ # 解析 bounds
1102
+ bounds = None
1103
+ if bounds_str:
1104
+ import re
1105
+ match = re.findall(r'\d+', bounds_str)
1106
+ if len(match) == 4:
1107
+ bounds = [int(x) for x in match]
1108
+
1109
+ # 精确匹配 text
1110
+ if elem_text == text:
1111
+ return {'attr_type': 'text', 'attr_value': text, 'bounds': bounds}
1112
+
1113
+ # 精确匹配 content-desc
1114
+ if elem_desc == text:
1115
+ return {'attr_type': 'description', 'attr_value': text, 'bounds': bounds}
1116
+
1117
+ # 模糊匹配 text
1118
+ if text in elem_text:
1119
+ return {'attr_type': 'textContains', 'attr_value': text, 'bounds': bounds}
1120
+
1121
+ # 模糊匹配 content-desc
1122
+ if text in elem_desc:
1123
+ return {'attr_type': 'descriptionContains', 'attr_value': text, 'bounds': bounds}
1124
+
1125
+ return None
1126
+ except Exception:
1127
+ return None
1128
+
1129
+ def click_by_id(self, resource_id: str) -> Dict:
1130
+ """通过 resource-id 点击"""
1131
+ try:
1132
+ if self._is_ios():
1133
+ ios_client = self._get_ios_client()
1134
+ if ios_client and hasattr(ios_client, 'wda'):
1135
+ elem = ios_client.wda(id=resource_id)
1136
+ if not elem.exists:
1137
+ elem = ios_client.wda(name=resource_id)
1138
+ if elem.exists:
1139
+ elem.click()
1140
+ time.sleep(0.3)
1141
+ self._record_operation('click', element=resource_id, ref=resource_id)
1142
+ return {"success": True, "message": f"✅ 点击成功: {resource_id}"}
1143
+ return {"success": False, "message": f"❌ 元素不存在: {resource_id}"}
1144
+ else:
1145
+ elem = self.client.u2(resourceId=resource_id)
1146
+ if elem.exists(timeout=0.5):
1147
+ elem.click()
1148
+ time.sleep(0.3)
1149
+ self._record_operation('click', element=resource_id, ref=resource_id)
1150
+ return {"success": True, "message": f"✅ 点击成功: {resource_id}"}
1151
+ return {"success": False, "message": f"❌ 元素不存在: {resource_id}"}
1152
+ except Exception as e:
1153
+ return {"success": False, "message": f"❌ 点击失败: {e}"}
1154
+
1155
+ # ==================== 长按操作 ====================
1156
+
1157
+ def long_press_at_coords(self, x: int, y: int, duration: float = 1.0,
1158
+ image_width: int = 0, image_height: int = 0,
1159
+ crop_offset_x: int = 0, crop_offset_y: int = 0,
1160
+ original_img_width: int = 0, original_img_height: int = 0) -> Dict:
1161
+ """长按坐标(核心功能,支持自动坐标转换)
1162
+
1163
+ Args:
1164
+ x: X 坐标(来自截图分析或屏幕坐标)
1165
+ y: Y 坐标(来自截图分析或屏幕坐标)
1166
+ duration: 长按持续时间(秒),默认 1.0
1167
+ image_width: 压缩后图片宽度(AI 看到的图片尺寸)
1168
+ image_height: 压缩后图片高度(AI 看到的图片尺寸)
1169
+ crop_offset_x: 局部截图的 X 偏移量(局部截图时传入)
1170
+ crop_offset_y: 局部截图的 Y 偏移量(局部截图时传入)
1171
+ original_img_width: 截图原始宽度(压缩前的尺寸,用于精确转换)
1172
+ original_img_height: 截图原始高度(压缩前的尺寸,用于精确转换)
1173
+
1174
+ 坐标转换说明:
1175
+ 1. 全屏压缩截图:AI 坐标 → 原图坐标(基于 image/original_img 比例)
1176
+ 2. 局部裁剪截图:AI 坐标 + 偏移量 = 屏幕坐标
1177
+ """
1178
+ try:
1179
+ # 获取屏幕尺寸
1180
+ screen_width, screen_height = 0, 0
1181
+ if self._is_ios():
1182
+ ios_client = self._get_ios_client()
1183
+ if ios_client and hasattr(ios_client, 'wda'):
1184
+ size = ios_client.wda.window_size()
1185
+ screen_width, screen_height = size[0], size[1]
1186
+ else:
1187
+ return {"success": False, "message": "❌ iOS 客户端未初始化"}
1188
+ else:
1189
+ info = self.client.u2.info
1190
+ screen_width = info.get('displayWidth', 0)
1191
+ screen_height = info.get('displayHeight', 0)
1192
+
1193
+ # 🎯 坐标转换
1194
+ original_x, original_y = x, y
1195
+ converted = False
1196
+ conversion_type = ""
1197
+
1198
+ # 情况1:局部裁剪截图 - 加上偏移量
1199
+ if crop_offset_x > 0 or crop_offset_y > 0:
1200
+ x = x + crop_offset_x
1201
+ y = y + crop_offset_y
1202
+ converted = True
1203
+ conversion_type = "crop_offset"
1204
+ # 情况2:全屏压缩截图 - 按比例转换到原图尺寸
1205
+ elif image_width > 0 and image_height > 0:
1206
+ target_width = original_img_width if original_img_width > 0 else screen_width
1207
+ target_height = original_img_height if original_img_height > 0 else screen_height
1208
+
1209
+ if target_width > 0 and target_height > 0:
1210
+ if image_width != target_width or image_height != target_height:
1211
+ x = int(x * target_width / image_width)
1212
+ y = int(y * target_height / image_height)
1213
+ converted = True
1214
+ conversion_type = "scale"
1215
+
1216
+ # 执行长按
1217
+ if self._is_ios():
1218
+ ios_client = self._get_ios_client()
1219
+ # iOS 使用 tap_hold 或 swipe 原地实现长按
1220
+ if hasattr(ios_client.wda, 'tap_hold'):
1221
+ ios_client.wda.tap_hold(x, y, duration=duration)
1222
+ else:
1223
+ # 兜底:用原地 swipe 模拟长按
1224
+ ios_client.wda.swipe(x, y, x, y, duration=duration)
1225
+ else:
1226
+ self.client.u2.long_click(x, y, duration=duration)
1227
+
1228
+ time.sleep(0.3)
1229
+
1230
+ # 计算百分比坐标(用于跨设备兼容)
1231
+ x_percent = round(x / screen_width * 100, 1) if screen_width > 0 else 0
1232
+ y_percent = round(y / screen_height * 100, 1) if screen_height > 0 else 0
1233
+
1234
+ # 记录操作
1235
+ self._record_operation(
1236
+ 'long_press',
1237
+ x=x,
1238
+ y=y,
1239
+ x_percent=x_percent,
1240
+ y_percent=y_percent,
1241
+ duration=duration,
1242
+ screen_width=screen_width,
1243
+ screen_height=screen_height,
1244
+ ref=f"coords_{x}_{y}"
1245
+ )
1246
+
1247
+ if converted:
1248
+ if conversion_type == "crop_offset":
1249
+ return {
1250
+ "success": True,
1251
+ "message": f"✅ 长按成功: ({x}, {y}) 持续 {duration}s\n"
1252
+ f" 🔍 局部截图坐标转换: ({original_x},{original_y}) + 偏移({crop_offset_x},{crop_offset_y}) → ({x},{y})"
1253
+ }
1254
+ else:
1255
+ return {
1256
+ "success": True,
1257
+ "message": f"✅ 长按成功: ({x}, {y}) 持续 {duration}s\n"
1258
+ f" 📐 坐标已转换: ({original_x},{original_y}) → ({x},{y})\n"
1259
+ f" 🖼️ 图片尺寸: {image_width}x{image_height} → 屏幕: {screen_width}x{screen_height}"
1260
+ }
1261
+ else:
1262
+ return {
1263
+ "success": True,
1264
+ "message": f"✅ 长按成功: ({x}, {y}) 持续 {duration}s [相对位置: {x_percent}%, {y_percent}%]"
1265
+ }
1266
+ except Exception as e:
1267
+ return {"success": False, "message": f"❌ 长按失败: {e}"}
1268
+
1269
+ def long_press_by_percent(self, x_percent: float, y_percent: float, duration: float = 1.0) -> Dict:
1270
+ """通过百分比坐标长按(跨设备兼容)
1271
+
1272
+ 百分比坐标原理:
1273
+ - 屏幕左上角是 (0%, 0%),右下角是 (100%, 100%)
1274
+ - 屏幕正中央是 (50%, 50%)
1275
+ - 像素坐标 = 屏幕尺寸 × (百分比 / 100)
1276
+
1277
+ Args:
1278
+ x_percent: X轴百分比 (0-100),0=最左,50=中间,100=最右
1279
+ y_percent: Y轴百分比 (0-100),0=最上,50=中间,100=最下
1280
+ duration: 长按持续时间(秒),默认 1.0
1281
+
1282
+ 优势:
1283
+ - 同样的百分比在不同分辨率设备上都能点到相同相对位置
1284
+ - 录制一次,多设备回放
1285
+ """
1286
+ try:
1287
+ # 第1步:获取屏幕尺寸
1288
+ if self._is_ios():
1289
+ ios_client = self._get_ios_client()
1290
+ if ios_client and hasattr(ios_client, 'wda'):
1291
+ size = ios_client.wda.window_size()
1292
+ width, height = size[0], size[1]
1293
+ else:
1294
+ return {"success": False, "message": "❌ iOS 客户端未初始化"}
1295
+ else:
1296
+ info = self.client.u2.info
1297
+ width = info.get('displayWidth', 0)
1298
+ height = info.get('displayHeight', 0)
1299
+
1300
+ if width == 0 or height == 0:
1301
+ return {"success": False, "message": "❌ 无法获取屏幕尺寸"}
1302
+
1303
+ # 第2步:百分比转像素坐标
1304
+ x = int(width * x_percent / 100)
1305
+ y = int(height * y_percent / 100)
1306
+
1307
+ # 第3步:执行长按
1308
+ if self._is_ios():
1309
+ ios_client = self._get_ios_client()
1310
+ if hasattr(ios_client.wda, 'tap_hold'):
1311
+ ios_client.wda.tap_hold(x, y, duration=duration)
1312
+ else:
1313
+ ios_client.wda.swipe(x, y, x, y, duration=duration)
1314
+ else:
1315
+ self.client.u2.long_click(x, y, duration=duration)
1316
+
1317
+ time.sleep(0.3)
1318
+
1319
+ # 第4步:记录操作
1320
+ self._record_operation(
1321
+ 'long_press',
1322
+ x=x,
1323
+ y=y,
1324
+ x_percent=x_percent,
1325
+ y_percent=y_percent,
1326
+ duration=duration,
1327
+ screen_width=width,
1328
+ screen_height=height,
1329
+ ref=f"percent_{x_percent}_{y_percent}"
1330
+ )
1331
+
1332
+ return {
1333
+ "success": True,
1334
+ "message": f"✅ 百分比长按成功: ({x_percent}%, {y_percent}%) → 像素({x}, {y}) 持续 {duration}s",
1335
+ "screen_size": {"width": width, "height": height},
1336
+ "percent": {"x": x_percent, "y": y_percent},
1337
+ "pixel": {"x": x, "y": y},
1338
+ "duration": duration
1339
+ }
1340
+ except Exception as e:
1341
+ return {"success": False, "message": f"❌ 百分比长按失败: {e}"}
1342
+
1343
+ def long_press_by_text(self, text: str, duration: float = 1.0) -> Dict:
1344
+ """通过文本长按
1345
+
1346
+ Args:
1347
+ text: 元素的文本内容(精确匹配)
1348
+ duration: 长按持续时间(秒),默认 1.0
1349
+ """
1350
+ try:
1351
+ if self._is_ios():
1352
+ ios_client = self._get_ios_client()
1353
+ if ios_client and hasattr(ios_client, 'wda'):
1354
+ elem = ios_client.wda(name=text)
1355
+ if not elem.exists:
1356
+ elem = ios_client.wda(label=text)
1357
+ if elem.exists:
1358
+ # iOS 元素长按
1359
+ bounds = elem.bounds
1360
+ x = int((bounds.x + bounds.x + bounds.width) / 2)
1361
+ y = int((bounds.y + bounds.y + bounds.height) / 2)
1362
+ if hasattr(ios_client.wda, 'tap_hold'):
1363
+ ios_client.wda.tap_hold(x, y, duration=duration)
1364
+ else:
1365
+ ios_client.wda.swipe(x, y, x, y, duration=duration)
1366
+ time.sleep(0.3)
1367
+ self._record_operation('long_press', element=text, duration=duration, ref=text)
1368
+ return {"success": True, "message": f"✅ 长按成功: '{text}' 持续 {duration}s"}
1369
+ return {"success": False, "message": f"❌ 文本不存在: {text}"}
1370
+ else:
1371
+ # 先查 XML 树,找到元素
1372
+ found_elem = self._find_element_in_tree(text)
1373
+
1374
+ if found_elem:
1375
+ attr_type = found_elem['attr_type']
1376
+ attr_value = found_elem['attr_value']
1377
+ bounds = found_elem.get('bounds')
1378
+
1379
+ # 根据找到的属性类型,使用对应的选择器
1380
+ if attr_type == 'text':
1381
+ elem = self.client.u2(text=attr_value)
1382
+ elif attr_type == 'textContains':
1383
+ elem = self.client.u2(textContains=attr_value)
1384
+ elif attr_type == 'description':
1385
+ elem = self.client.u2(description=attr_value)
1386
+ elif attr_type == 'descriptionContains':
1387
+ elem = self.client.u2(descriptionContains=attr_value)
1388
+ else:
1389
+ elem = None
1390
+
1391
+ if elem and elem.exists(timeout=1):
1392
+ elem.long_click(duration=duration)
1393
+ time.sleep(0.3)
1394
+ self._record_operation('long_press', element=text, duration=duration, ref=f"{attr_type}:{attr_value}")
1395
+ return {"success": True, "message": f"✅ 长按成功({attr_type}): '{text}' 持续 {duration}s"}
1396
+
1397
+ # 如果选择器失败,用坐标兜底
1398
+ if bounds:
1399
+ x = (bounds[0] + bounds[2]) // 2
1400
+ y = (bounds[1] + bounds[3]) // 2
1401
+ self.client.u2.long_click(x, y, duration=duration)
1402
+ time.sleep(0.3)
1403
+ self._record_operation('long_press', element=text, x=x, y=y, duration=duration, ref=f"coords:{x},{y}")
1404
+ return {"success": True, "message": f"✅ 长按成功(坐标兜底): '{text}' @ ({x},{y}) 持续 {duration}s"}
1405
+
1406
+ return {"success": False, "message": f"❌ 文本不存在: {text}"}
1407
+ except Exception as e:
1408
+ return {"success": False, "message": f"❌ 长按失败: {e}"}
1409
+
1410
+ def long_press_by_id(self, resource_id: str, duration: float = 1.0) -> Dict:
1411
+ """通过 resource-id 长按
1412
+
1413
+ Args:
1414
+ resource_id: 元素的 resource-id
1415
+ duration: 长按持续时间(秒),默认 1.0
1416
+ """
1417
+ try:
1418
+ if self._is_ios():
1419
+ ios_client = self._get_ios_client()
1420
+ if ios_client and hasattr(ios_client, 'wda'):
1421
+ elem = ios_client.wda(id=resource_id)
1422
+ if not elem.exists:
1423
+ elem = ios_client.wda(name=resource_id)
1424
+ if elem.exists:
1425
+ bounds = elem.bounds
1426
+ x = int((bounds.x + bounds.x + bounds.width) / 2)
1427
+ y = int((bounds.y + bounds.y + bounds.height) / 2)
1428
+ if hasattr(ios_client.wda, 'tap_hold'):
1429
+ ios_client.wda.tap_hold(x, y, duration=duration)
1430
+ else:
1431
+ ios_client.wda.swipe(x, y, x, y, duration=duration)
1432
+ time.sleep(0.3)
1433
+ self._record_operation('long_press', element=resource_id, duration=duration, ref=resource_id)
1434
+ return {"success": True, "message": f"✅ 长按成功: {resource_id} 持续 {duration}s"}
1435
+ return {"success": False, "message": f"❌ 元素不存在: {resource_id}"}
1436
+ else:
1437
+ elem = self.client.u2(resourceId=resource_id)
1438
+ if elem.exists(timeout=0.5):
1439
+ elem.long_click(duration=duration)
1440
+ time.sleep(0.3)
1441
+ self._record_operation('long_press', element=resource_id, duration=duration, ref=resource_id)
1442
+ return {"success": True, "message": f"✅ 长按成功: {resource_id} 持续 {duration}s"}
1443
+ return {"success": False, "message": f"❌ 元素不存在: {resource_id}"}
1444
+ except Exception as e:
1445
+ return {"success": False, "message": f"❌ 长按失败: {e}"}
1446
+
1447
+ # ==================== 输入操作 ====================
1448
+
1449
+ def input_text_by_id(self, resource_id: str, text: str) -> Dict:
1450
+ """通过 resource-id 输入文本
1451
+
1452
+ 优化策略:
1453
+ 1. 先用 resourceId 定位
1454
+ 2. 如果只有 1 个元素 → 直接输入
1455
+ 3. 如果有多个相同 ID(>5个说明 ID 不可靠)→ 改用 EditText 类型定位
1456
+ 4. 多个 EditText 时选择最靠上的(搜索框通常在顶部)
1457
+ """
1458
+ try:
1459
+ if self._is_ios():
1460
+ ios_client = self._get_ios_client()
1461
+ if ios_client and hasattr(ios_client, 'wda'):
1462
+ elem = ios_client.wda(id=resource_id)
1463
+ if not elem.exists:
1464
+ elem = ios_client.wda(name=resource_id)
1465
+ if elem.exists:
1466
+ elem.set_text(text)
1467
+ time.sleep(0.3)
1468
+ self._record_operation('input', element=resource_id, ref=resource_id, text=text)
1469
+ return {"success": True, "message": f"✅ 输入成功: '{text}'"}
1470
+ return {"success": False, "message": f"❌ 输入框不存在: {resource_id}"}
1471
+ else:
1472
+ elements = self.client.u2(resourceId=resource_id)
1473
+
1474
+ # 检查是否存在
1475
+ if elements.exists(timeout=0.5):
1476
+ count = elements.count
1477
+
1478
+ # 只有 1 个元素,直接输入
1479
+ if count == 1:
1480
+ elements.set_text(text)
1481
+ time.sleep(0.3)
1482
+ self._record_operation('input', element=resource_id, ref=resource_id, text=text)
1483
+ return {"success": True, "message": f"✅ 输入成功: '{text}'"}
1484
+
1485
+ # 多个相同 ID(<=5个),尝试智能选择
1486
+ if count <= 5:
1487
+ for i in range(count):
1488
+ try:
1489
+ elem = elements[i]
1490
+ info = elem.info
1491
+ # 优先选择可编辑的
1492
+ if info.get('editable') or info.get('focusable'):
1493
+ elem.set_text(text)
1494
+ time.sleep(0.3)
1495
+ self._record_operation('input', element=resource_id, ref=resource_id, text=text)
1496
+ return {"success": True, "message": f"✅ 输入成功: '{text}'"}
1497
+ except:
1498
+ continue
1499
+ # 没找到可编辑的,用第一个
1500
+ elements[0].set_text(text)
1501
+ time.sleep(0.3)
1502
+ self._record_operation('input', element=resource_id, ref=resource_id, text=text)
1503
+ return {"success": True, "message": f"✅ 输入成功: '{text}'"}
1504
+
1505
+ # ID 不可靠(不存在或太多),改用 EditText 类型定位
1506
+ edit_texts = self.client.u2(className='android.widget.EditText')
1507
+ if edit_texts.exists(timeout=0.5):
1508
+ et_count = edit_texts.count
1509
+ if et_count == 1:
1510
+ edit_texts.set_text(text)
1511
+ time.sleep(0.3)
1512
+ self._record_operation('input', element='EditText', ref='EditText', text=text)
1513
+ return {"success": True, "message": f"✅ 输入成功: '{text}' (通过 EditText 定位)"}
1514
+
1515
+ # 多个 EditText,选择最靠上的
1516
+ best_elem = None
1517
+ min_top = 9999
1518
+ for i in range(et_count):
1519
+ try:
1520
+ elem = edit_texts[i]
1521
+ top = elem.info.get('bounds', {}).get('top', 9999)
1522
+ if top < min_top:
1523
+ min_top = top
1524
+ best_elem = elem
1525
+ except:
1526
+ continue
1527
+
1528
+ if best_elem:
1529
+ best_elem.set_text(text)
1530
+ time.sleep(0.3)
1531
+ self._record_operation('input', element='EditText', ref='EditText', text=text)
1532
+ return {"success": True, "message": f"✅ 输入成功: '{text}' (通过 EditText 定位,选择最顶部的)"}
1533
+
1534
+ return {"success": False, "message": f"❌ 输入框不存在: {resource_id}"}
1535
+
1536
+ except Exception as e:
1537
+ return {"success": False, "message": f"❌ 输入失败: {e}"}
1538
+
1539
+ def input_at_coords(self, x: int, y: int, text: str) -> Dict:
1540
+ """点击坐标后输入文本(适合游戏)"""
1541
+ try:
1542
+ # 获取屏幕尺寸(用于转换百分比)
1543
+ screen_width, screen_height = 0, 0
1544
+
1545
+ # 先点击聚焦
1546
+ if self._is_ios():
1547
+ ios_client = self._get_ios_client()
1548
+ if ios_client and hasattr(ios_client, 'wda'):
1549
+ ios_client.wda.click(x, y)
1550
+ size = ios_client.wda.window_size()
1551
+ screen_width, screen_height = size[0], size[1]
1552
+ else:
1553
+ self.client.u2.click(x, y)
1554
+ info = self.client.u2.info
1555
+ screen_width = info.get('displayWidth', 0)
1556
+ screen_height = info.get('displayHeight', 0)
1557
+
1558
+ time.sleep(0.3)
1559
+
1560
+ # 输入文本
1561
+ if self._is_ios():
1562
+ ios_client = self._get_ios_client()
1563
+ if ios_client and hasattr(ios_client, 'wda'):
1564
+ ios_client.wda.send_keys(text)
1565
+ else:
1566
+ self.client.u2.send_keys(text)
1567
+
1568
+ time.sleep(0.3)
1569
+
1570
+ # 计算百分比坐标
1571
+ x_percent = round(x / screen_width * 100, 1) if screen_width > 0 else 0
1572
+ y_percent = round(y / screen_height * 100, 1) if screen_height > 0 else 0
1573
+
1574
+ self._record_operation(
1575
+ 'input',
1576
+ x=x,
1577
+ y=y,
1578
+ x_percent=x_percent,
1579
+ y_percent=y_percent,
1580
+ ref=f"coords_{x}_{y}",
1581
+ text=text
1582
+ )
1583
+
1584
+ return {"success": True, "message": f"✅ 输入成功: ({x}, {y}) [相对位置: {x_percent}%, {y_percent}%] -> '{text}'"}
1585
+ except Exception as e:
1586
+ return {"success": False, "message": f"❌ 输入失败: {e}"}
1587
+
1588
+ # ==================== 导航操作 ====================
1589
+
1590
+ async def swipe(self, direction: str) -> Dict:
1591
+ """滑动屏幕"""
1592
+ try:
1593
+ if self._is_ios():
1594
+ ios_client = self._get_ios_client()
1595
+ if ios_client and hasattr(ios_client, 'wda'):
1596
+ size = ios_client.wda.window_size()
1597
+ width, height = size[0], size[1]
1598
+ else:
1599
+ return {"success": False, "message": "❌ iOS 客户端未初始化"}
1600
+ else:
1601
+ width, height = self.client.u2.window_size()
1602
+
1603
+ center_x, center_y = width // 2, height // 2
1604
+
1605
+ swipe_map = {
1606
+ 'up': (center_x, int(height * 0.8), center_x, int(height * 0.2)),
1607
+ 'down': (center_x, int(height * 0.2), center_x, int(height * 0.8)),
1608
+ 'left': (int(width * 0.8), center_y, int(width * 0.2), center_y),
1609
+ 'right': (int(width * 0.2), center_y, int(width * 0.8), center_y),
1610
+ }
1611
+
1612
+ if direction not in swipe_map:
1613
+ return {"success": False, "message": f"❌ 不支持的方向: {direction}"}
1614
+
1615
+ x1, y1, x2, y2 = swipe_map[direction]
1616
+
1617
+ if self._is_ios():
1618
+ ios_client.wda.swipe(x1, y1, x2, y2)
1619
+ else:
1620
+ self.client.u2.swipe(x1, y1, x2, y2, duration=0.5)
1621
+
1622
+ self._record_operation('swipe', direction=direction)
1623
+
1624
+ return {"success": True, "message": f"✅ 滑动成功: {direction}"}
1625
+ except Exception as e:
1626
+ return {"success": False, "message": f"❌ 滑动失败: {e}"}
1627
+
1628
+ async def press_key(self, key: str) -> Dict:
1629
+ """按键操作"""
1630
+ key_map = {
1631
+ 'enter': 66, '回车': 66,
1632
+ 'search': 84, '搜索': 84,
1633
+ 'back': 4, '返回': 4,
1634
+ 'home': 3,
1635
+ }
1636
+
1637
+ try:
1638
+ if self._is_ios():
1639
+ ios_key_map = {'enter': 'return', 'back': 'back', 'home': 'home'}
1640
+ ios_key = ios_key_map.get(key.lower())
1641
+ if ios_key:
1642
+ ios_client = self._get_ios_client()
1643
+ if ios_client and hasattr(ios_client, 'wda'):
1644
+ # iOS 使用不同的按键方式
1645
+ if ios_key == 'return':
1646
+ ios_client.wda.send_keys('\n')
1647
+ elif ios_key == 'home':
1648
+ ios_client.wda.home()
1649
+ return {"success": True, "message": f"✅ 按键成功: {key}"}
1650
+ return {"success": False, "message": f"❌ iOS 不支持: {key}"}
1651
+ else:
1652
+ keycode = key_map.get(key.lower())
1653
+ if keycode:
1654
+ self.client.u2.shell(f'input keyevent {keycode}')
1655
+ self._record_operation('press_key', key=key)
1656
+ return {"success": True, "message": f"✅ 按键成功: {key}"}
1657
+ return {"success": False, "message": f"❌ 不支持的按键: {key}"}
1658
+ except Exception as e:
1659
+ return {"success": False, "message": f"❌ 按键失败: {e}"}
1660
+
1661
+ def wait(self, seconds: float) -> Dict:
1662
+ """等待指定时间"""
1663
+ time.sleep(seconds)
1664
+ return {"success": True, "message": f"✅ 已等待 {seconds} 秒"}
1665
+
1666
+ # ==================== 应用管理 ====================
1667
+
1668
+ async def launch_app(self, package_name: str) -> Dict:
1669
+ """启动应用"""
1670
+ try:
1671
+ if self._is_ios():
1672
+ ios_client = self._get_ios_client()
1673
+ if ios_client and hasattr(ios_client, 'wda'):
1674
+ ios_client.wda.app_activate(package_name)
1675
+ else:
1676
+ self.client.u2.app_start(package_name)
1677
+
1678
+ await asyncio.sleep(2)
1679
+
1680
+ self._record_operation('launch_app', package_name=package_name)
1681
+
1682
+ return {
1683
+ "success": True,
1684
+ "message": f"✅ 已启动: {package_name}\n💡 建议等待 2-3 秒让页面加载"
1685
+ }
1686
+ except Exception as e:
1687
+ return {"success": False, "message": f"❌ 启动失败: {e}"}
1688
+
1689
+ def terminate_app(self, package_name: str) -> Dict:
1690
+ """终止应用"""
1691
+ try:
1692
+ if self._is_ios():
1693
+ ios_client = self._get_ios_client()
1694
+ if ios_client and hasattr(ios_client, 'wda'):
1695
+ ios_client.wda.app_terminate(package_name)
1696
+ else:
1697
+ self.client.u2.app_stop(package_name)
1698
+ return {"success": True, "message": f"✅ 已终止: {package_name}"}
1699
+ except Exception as e:
1700
+ return {"success": False, "message": f"❌ 终止失败: {e}"}
1701
+
1702
+ def list_apps(self, filter_keyword: str = "") -> Dict:
1703
+ """列出已安装应用"""
1704
+ try:
1705
+ if self._is_ios():
1706
+ ios_client = self._get_ios_client()
1707
+ if ios_client and hasattr(ios_client, 'wda'):
1708
+ # iOS 暂不支持列出所有应用
1709
+ return {
1710
+ "success": True,
1711
+ "apps": [],
1712
+ "count": 0,
1713
+ "message": "💡 iOS 暂不支持列出所有应用,请直接使用 bundle_id 启动"
1714
+ }
1715
+ else:
1716
+ apps = self.client.u2.app_list()
1717
+ if filter_keyword:
1718
+ apps = [app for app in apps if filter_keyword.lower() in app.lower()]
1719
+ return {
1720
+ "success": True,
1721
+ "apps": apps[:50], # 限制返回数量
1722
+ "count": len(apps)
1723
+ }
1724
+ except Exception as e:
1725
+ return {"success": False, "message": f"❌ 获取应用列表失败: {e}"}
1726
+
1727
+ # ==================== 设备管理 ====================
1728
+
1729
+ def list_devices(self) -> Dict:
1730
+ """列出已连接设备"""
1731
+ try:
1732
+ platform = "ios" if self._is_ios() else "android"
1733
+
1734
+ if platform == "ios":
1735
+ from .ios_device_manager_wda import IOSDeviceManagerWDA
1736
+ manager = IOSDeviceManagerWDA()
1737
+ devices = manager.list_devices()
1738
+ else:
1739
+ from .device_manager import DeviceManager
1740
+ manager = DeviceManager()
1741
+ devices = manager.list_devices()
1742
+
1743
+ return {
1744
+ "success": True,
1745
+ "platform": platform,
1746
+ "devices": devices,
1747
+ "count": len(devices)
1748
+ }
1749
+ except Exception as e:
1750
+ return {"success": False, "message": f"❌ 获取设备列表失败: {e}"}
1751
+
1752
+ def check_connection(self) -> Dict:
1753
+ """检查设备连接"""
1754
+ try:
1755
+ if self._is_ios():
1756
+ ios_client = self._get_ios_client()
1757
+ if ios_client and hasattr(ios_client, 'wda'):
1758
+ return {"success": True, "connected": True, "platform": "ios"}
1759
+ return {"success": False, "connected": False, "message": "❌ iOS 未连接"}
1760
+ else:
1761
+ info = self.client.u2.device_info
1762
+ return {
1763
+ "success": True,
1764
+ "connected": True,
1765
+ "platform": "android",
1766
+ "device": f"{info.get('brand', '')} {info.get('model', '')}"
1767
+ }
1768
+ except Exception as e:
1769
+ return {"success": False, "connected": False, "message": f"❌ 连接检查失败: {e}"}
1770
+
1771
+ # ==================== 辅助工具 ====================
1772
+
1773
+ def list_elements(self) -> List[Dict]:
1774
+ """列出页面元素"""
1775
+ try:
1776
+ if self._is_ios():
1777
+ ios_client = self._get_ios_client()
1778
+ if ios_client and hasattr(ios_client, 'list_elements'):
1779
+ return ios_client.list_elements()
1780
+ return [{"error": "iOS 暂不支持元素列表,建议使用截图"}]
1781
+ else:
1782
+ xml_string = self.client.u2.dump_hierarchy()
1783
+ elements = self.client.xml_parser.parse(xml_string)
1784
+
1785
+ result = []
1786
+ for elem in elements:
1787
+ if elem.get('clickable') or elem.get('focusable'):
1788
+ result.append({
1789
+ 'resource_id': elem.get('resource_id', ''),
1790
+ 'text': elem.get('text', ''),
1791
+ 'content_desc': elem.get('content_desc', ''),
1792
+ 'bounds': elem.get('bounds', ''),
1793
+ 'clickable': elem.get('clickable', False)
1794
+ })
1795
+ return result
1796
+ except Exception as e:
1797
+ return [{"error": f"获取元素失败: {e}"}]
1798
+
1799
+ def find_close_button(self) -> Dict:
1800
+ """智能查找关闭按钮(不点击,只返回位置)
1801
+
1802
+ 从元素列表中找最可能的关闭按钮,返回其坐标和百分比位置。
1803
+ 适用于关闭弹窗广告等场景。
1804
+
1805
+ Returns:
1806
+ 包含关闭按钮位置信息的字典,或截图让 AI 分析
1807
+ """
1808
+ try:
1809
+ import re
1810
+
1811
+ if self._is_ios():
1812
+ return {"success": False, "message": "iOS 暂不支持,请使用截图+坐标点击"}
1813
+
1814
+ # 获取屏幕尺寸
1815
+ screen_width = self.client.u2.info.get('displayWidth', 720)
1816
+ screen_height = self.client.u2.info.get('displayHeight', 1280)
1817
+
1818
+ # 获取元素列表
1819
+ xml_string = self.client.u2.dump_hierarchy()
1820
+ import xml.etree.ElementTree as ET
1821
+ root = ET.fromstring(xml_string)
1822
+
1823
+ # 关闭按钮特征
1824
+ close_texts = ['×', 'X', 'x', '关闭', '取消', 'close', 'Close', '跳过', '知道了', '我知道了']
1825
+ candidates = []
1826
+
1827
+ for elem in root.iter():
1828
+ text = elem.attrib.get('text', '')
1829
+ content_desc = elem.attrib.get('content-desc', '')
1830
+ bounds_str = elem.attrib.get('bounds', '')
1831
+ class_name = elem.attrib.get('class', '')
1832
+ clickable = elem.attrib.get('clickable', 'false') == 'true'
1833
+
1834
+ if not bounds_str:
1835
+ continue
1836
+
1837
+ match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds_str)
1838
+ if not match:
1839
+ continue
1840
+
1841
+ x1, y1, x2, y2 = map(int, match.groups())
1842
+ width = x2 - x1
1843
+ height = y2 - y1
1844
+ center_x = (x1 + x2) // 2
1845
+ center_y = (y1 + y2) // 2
1846
+
1847
+ # 计算百分比
1848
+ x_percent = round(center_x / screen_width * 100, 1)
1849
+ y_percent = round(center_y / screen_height * 100, 1)
1850
+
1851
+ score = 0
1852
+ reason = ""
1853
+
1854
+ # 策略1:关闭文本
1855
+ if text in close_texts:
1856
+ score = 100
1857
+ reason = f"文本='{text}'"
1858
+
1859
+ # 策略2:content-desc 包含关闭关键词
1860
+ elif any(kw in content_desc.lower() for kw in ['关闭', 'close', 'dismiss', '跳过']):
1861
+ score = 90
1862
+ reason = f"描述='{content_desc}'"
1863
+
1864
+ # 策略3:小尺寸的 clickable 元素(可能是 X 图标)
1865
+ elif clickable:
1866
+ min_size = max(20, int(screen_width * 0.03))
1867
+ max_size = max(120, int(screen_width * 0.12))
1868
+ if min_size <= width <= max_size and min_size <= height <= max_size:
1869
+ # 基于位置评分:角落位置加分
1870
+ rel_x = center_x / screen_width
1871
+ rel_y = center_y / screen_height
1872
+
1873
+ # 右上角得分最高
1874
+ if rel_x > 0.6 and rel_y < 0.5:
1875
+ score = 70 + (rel_x - 0.6) * 50 + (0.5 - rel_y) * 50
1876
+ reason = f"右上角小元素 {width}x{height}px"
1877
+ # 左上角
1878
+ elif rel_x < 0.4 and rel_y < 0.5:
1879
+ score = 60 + (0.4 - rel_x) * 50 + (0.5 - rel_y) * 50
1880
+ reason = f"左上角小元素 {width}x{height}px"
1881
+ # 其他位置的小元素
1882
+ elif 'Image' in class_name:
1883
+ score = 50
1884
+ reason = f"图片元素 {width}x{height}px"
1885
+ else:
1886
+ score = 40
1887
+ reason = f"小型可点击元素 {width}x{height}px"
1888
+
1889
+ if score > 0:
1890
+ candidates.append({
1891
+ 'score': score,
1892
+ 'reason': reason,
1893
+ 'bounds': bounds_str,
1894
+ 'center_x': center_x,
1895
+ 'center_y': center_y,
1896
+ 'x_percent': x_percent,
1897
+ 'y_percent': y_percent,
1898
+ 'size': f"{width}x{height}"
1899
+ })
1900
+
1901
+ if not candidates:
1902
+ # 没找到,截图让 AI 分析
1903
+ screenshot_result = self.take_screenshot(description="找关闭按钮", compress=True)
1904
+ return {
1905
+ "success": False,
1906
+ "message": "❌ 元素树未找到关闭按钮,已截图供 AI 分析",
1907
+ "screenshot": screenshot_result.get("screenshot_path", ""),
1908
+ "screen_size": {"width": screen_width, "height": screen_height},
1909
+ "image_size": {
1910
+ "width": screenshot_result.get("image_width"),
1911
+ "height": screenshot_result.get("image_height")
1912
+ },
1913
+ "original_size": {
1914
+ "width": screenshot_result.get("original_img_width"),
1915
+ "height": screenshot_result.get("original_img_height")
1916
+ },
1917
+ "tip": "请分析截图找到 X 关闭按钮,然后调用 mobile_click_by_percent(x_percent, y_percent)"
1918
+ }
1919
+
1920
+ # 按得分排序
1921
+ candidates.sort(key=lambda x: x['score'], reverse=True)
1922
+ best = candidates[0]
1923
+
1924
+ return {
1925
+ "success": True,
1926
+ "message": f"✅ 找到可能的关闭按钮",
1927
+ "best_candidate": {
1928
+ "reason": best['reason'],
1929
+ "center": {"x": best['center_x'], "y": best['center_y']},
1930
+ "percent": {"x": best['x_percent'], "y": best['y_percent']},
1931
+ "bounds": best['bounds'],
1932
+ "size": best['size'],
1933
+ "score": best['score']
1934
+ },
1935
+ "click_command": f"mobile_click_by_percent({best['x_percent']}, {best['y_percent']})",
1936
+ "other_candidates": [
1937
+ {"reason": c['reason'], "percent": f"({c['x_percent']}%, {c['y_percent']}%)", "score": c['score']}
1938
+ for c in candidates[1:4]
1939
+ ] if len(candidates) > 1 else [],
1940
+ "screen_size": {"width": screen_width, "height": screen_height}
1941
+ }
1942
+
1943
+ except Exception as e:
1944
+ return {"success": False, "message": f"❌ 查找关闭按钮失败: {e}"}
1945
+
1946
+ def close_popup(self) -> Dict:
1947
+ """智能关闭弹窗(改进版)
1948
+
1949
+ 核心改进:先检测弹窗区域,再在弹窗范围内查找关闭按钮
1950
+
1951
+ 策略(优先级从高到低):
1952
+ 1. 检测弹窗区域(非全屏的大面积容器)
1953
+ 2. 在弹窗边界内查找关闭相关的文本/描述(×、X、关闭、close 等)
1954
+ 3. 在弹窗边界内查找小尺寸的 clickable 元素(优先边角位置)
1955
+ 4. 如果都找不到,截图让 AI 视觉识别
1956
+
1957
+ 适配策略:
1958
+ - X 按钮可能在任意位置(上下左右都支持)
1959
+ - 使用百分比坐标记录,跨分辨率兼容
1960
+ """
1961
+ try:
1962
+ import re
1963
+ import xml.etree.ElementTree as ET
1964
+
1965
+ # 获取屏幕尺寸
1966
+ if self._is_ios():
1967
+ return {"success": False, "message": "iOS 暂不支持,请使用截图+坐标点击"}
1968
+
1969
+ screen_width = self.client.u2.info.get('displayWidth', 720)
1970
+ screen_height = self.client.u2.info.get('displayHeight', 1280)
1971
+
1972
+ # 获取原始 XML
1973
+ xml_string = self.client.u2.dump_hierarchy()
1974
+
1975
+ # 关闭按钮的文本特征
1976
+ close_texts = ['×', 'X', 'x', '关闭', '取消', 'close', 'Close', 'CLOSE', '跳过', '知道了']
1977
+ close_desc_keywords = ['关闭', 'close', 'dismiss', 'cancel', '跳过']
1978
+
1979
+ close_candidates = []
1980
+ popup_bounds = None # 弹窗区域
1981
+
1982
+ # 解析 XML
1983
+ try:
1984
+ root = ET.fromstring(xml_string)
1985
+ all_elements = list(root.iter())
1986
+
1987
+ # ===== 第一步:检测弹窗区域 =====
1988
+ # 弹窗特征:非全屏、面积较大、通常在屏幕中央的容器
1989
+ popup_containers = []
1990
+ for idx, elem in enumerate(all_elements):
1991
+ bounds_str = elem.attrib.get('bounds', '')
1992
+ class_name = elem.attrib.get('class', '')
1993
+
1994
+ if not bounds_str:
1995
+ continue
1996
+
1997
+ match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds_str)
1998
+ if not match:
1999
+ continue
2000
+
2001
+ x1, y1, x2, y2 = map(int, match.groups())
2002
+ width = x2 - x1
2003
+ height = y2 - y1
2004
+ area = width * height
2005
+ screen_area = screen_width * screen_height
2006
+
2007
+ # 弹窗容器特征:
2008
+ # 1. 面积在屏幕的 10%-90% 之间(非全屏)
2009
+ # 2. 宽度或高度不等于屏幕尺寸
2010
+ # 3. 是容器类型(Layout/View/Dialog)
2011
+ is_container = any(kw in class_name for kw in ['Layout', 'View', 'Dialog', 'Card', 'Container'])
2012
+ area_ratio = area / screen_area
2013
+ is_not_fullscreen = (width < screen_width * 0.98 or height < screen_height * 0.98)
2014
+ is_reasonable_size = 0.08 < area_ratio < 0.9
2015
+
2016
+ # 排除状态栏区域(y1 通常很小)
2017
+ is_below_statusbar = y1 > 50
2018
+
2019
+ if is_container and is_not_fullscreen and is_reasonable_size and is_below_statusbar:
2020
+ popup_containers.append({
2021
+ 'bounds': (x1, y1, x2, y2),
2022
+ 'bounds_str': bounds_str,
2023
+ 'area': area,
2024
+ 'area_ratio': area_ratio,
2025
+ 'idx': idx, # 元素在 XML 中的顺序(越后越上层)
2026
+ 'class': class_name
2027
+ })
2028
+
2029
+ # 选择最可能的弹窗容器(优先选择:XML 顺序靠后 + 面积适中)
2030
+ if popup_containers:
2031
+ # 按 XML 顺序倒序(后出现的在上层),然后按面积适中程度排序
2032
+ popup_containers.sort(key=lambda x: (x['idx'], -abs(x['area_ratio'] - 0.3)), reverse=True)
2033
+ popup_bounds = popup_containers[0]['bounds']
2034
+
2035
+ # ===== 第二步:在弹窗范围内查找关闭按钮 =====
2036
+ for idx, elem in enumerate(all_elements):
2037
+ text = elem.attrib.get('text', '')
2038
+ content_desc = elem.attrib.get('content-desc', '')
2039
+ bounds_str = elem.attrib.get('bounds', '')
2040
+ class_name = elem.attrib.get('class', '')
2041
+ clickable = elem.attrib.get('clickable', 'false') == 'true'
2042
+
2043
+ if not bounds_str:
2044
+ continue
2045
+
2046
+ # 解析 bounds
2047
+ match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds_str)
2048
+ if not match:
2049
+ continue
2050
+
2051
+ x1, y1, x2, y2 = map(int, match.groups())
2052
+ width = x2 - x1
2053
+ height = y2 - y1
2054
+ center_x = (x1 + x2) // 2
2055
+ center_y = (y1 + y2) // 2
2056
+
2057
+ # 如果检测到弹窗区域,检查元素是否在弹窗范围内或附近
2058
+ in_popup = True
2059
+ popup_edge_bonus = 0
2060
+ is_floating_close = False # 是否是浮动关闭按钮(在弹窗外部上方)
2061
+ if popup_bounds:
2062
+ px1, py1, px2, py2 = popup_bounds
2063
+
2064
+ # 关闭按钮可能在弹窗外部(常见设计:X 按钮浮在弹窗右上角外侧)
2065
+ # 扩大搜索范围:弹窗上方 200 像素,右侧 50 像素
2066
+ margin_top = 200 # 上方扩展范围(关闭按钮常在弹窗上方)
2067
+ margin_side = 50 # 左右扩展范围
2068
+ margin_bottom = 30 # 下方扩展范围
2069
+
2070
+ in_popup = (px1 - margin_side <= center_x <= px2 + margin_side and
2071
+ py1 - margin_top <= center_y <= py2 + margin_bottom)
2072
+
2073
+ # 检查是否是浮动关闭按钮(在弹窗外侧:上方或下方)
2074
+ # 上方浮动关闭按钮(常见:右上角外侧)
2075
+ if center_y < py1 and center_y > py1 - margin_top:
2076
+ if center_x > (px1 + px2) / 2: # 在弹窗右半部分上方
2077
+ is_floating_close = True
2078
+ # 下方浮动关闭按钮(常见:底部中间外侧)
2079
+ elif center_y > py2 and center_y < py2 + margin_top:
2080
+ # 下方关闭按钮通常在中间位置
2081
+ if abs(center_x - (px1 + px2) / 2) < (px2 - px1) / 2:
2082
+ is_floating_close = True
2083
+
2084
+ if in_popup:
2085
+ # 计算元素是否在弹窗边缘(关闭按钮通常在边缘)
2086
+ dist_to_top = abs(center_y - py1)
2087
+ dist_to_bottom = abs(center_y - py2)
2088
+ dist_to_left = abs(center_x - px1)
2089
+ dist_to_right = abs(center_x - px2)
2090
+ min_dist = min(dist_to_top, dist_to_bottom, dist_to_left, dist_to_right)
2091
+
2092
+ # 在弹窗边缘 100 像素内的元素加分
2093
+ if min_dist < 100:
2094
+ popup_edge_bonus = 3.0 * (1 - min_dist / 100)
2095
+
2096
+ # 浮动关闭按钮(在弹窗上方外侧)给予高额加分
2097
+ if is_floating_close:
2098
+ popup_edge_bonus += 5.0 # 大幅加分
2099
+
2100
+ if not in_popup:
2101
+ continue
2102
+
2103
+ # 相对位置(0-1)
2104
+ rel_x = center_x / screen_width
2105
+ rel_y = center_y / screen_height
2106
+
2107
+ score = 0
2108
+ match_type = ""
2109
+ position = self._get_position_name(rel_x, rel_y)
2110
+
2111
+ # ===== 策略1:精确匹配关闭文本(最高优先级)=====
2112
+ if text in close_texts:
2113
+ score = 15.0 + popup_edge_bonus
2114
+ match_type = f"text='{text}'"
2115
+
2116
+ # ===== 策略2:content-desc 包含关闭关键词 =====
2117
+ elif any(kw in content_desc.lower() for kw in close_desc_keywords):
2118
+ score = 12.0 + popup_edge_bonus
2119
+ match_type = f"desc='{content_desc}'"
2120
+
2121
+ # ===== 策略3:clickable 的小尺寸元素(优先于非 clickable)=====
2122
+ elif clickable:
2123
+ min_size = max(20, int(screen_width * 0.03))
2124
+ max_size = max(120, int(screen_width * 0.15))
2125
+ if min_size <= width <= max_size and min_size <= height <= max_size:
2126
+ # clickable 元素基础分更高
2127
+ base_score = 8.0
2128
+ # 浮动关闭按钮给予最高分
2129
+ if is_floating_close:
2130
+ base_score = 12.0
2131
+ match_type = "floating_close"
2132
+ elif 'Image' in class_name:
2133
+ score = base_score + 2.0
2134
+ match_type = "clickable_image"
2135
+ else:
2136
+ match_type = "clickable"
2137
+ score = base_score + self._get_position_score(rel_x, rel_y) + popup_edge_bonus
2138
+
2139
+ # ===== 策略4:ImageView/ImageButton 类型的小元素(非 clickable)=====
2140
+ elif 'Image' in class_name:
2141
+ min_size = max(15, int(screen_width * 0.02))
2142
+ max_size = max(120, int(screen_width * 0.12))
2143
+ if min_size <= width <= max_size and min_size <= height <= max_size:
2144
+ score = 5.0 + self._get_position_score(rel_x, rel_y) + popup_edge_bonus
2145
+ match_type = "ImageView"
2146
+
2147
+ # XML 顺序加分(后出现的元素在上层,更可能是弹窗内的元素)
2148
+ if score > 0:
2149
+ xml_order_bonus = idx / len(all_elements) * 2.0 # 最多加 2 分
2150
+ score += xml_order_bonus
2151
+
2152
+ close_candidates.append({
2153
+ 'bounds': bounds_str,
2154
+ 'center_x': center_x,
2155
+ 'center_y': center_y,
2156
+ 'width': width,
2157
+ 'height': height,
2158
+ 'score': score,
2159
+ 'position': position,
2160
+ 'match_type': match_type,
2161
+ 'text': text,
2162
+ 'content_desc': content_desc,
2163
+ 'x_percent': round(rel_x * 100, 1),
2164
+ 'y_percent': round(rel_y * 100, 1),
2165
+ 'in_popup': popup_bounds is not None
2166
+ })
2167
+
2168
+ except ET.ParseError:
2169
+ pass
2170
+
2171
+ if not close_candidates:
2172
+ # 如果检测到弹窗区域,先尝试点击常见的关闭按钮位置
2173
+ if popup_bounds:
2174
+ px1, py1, px2, py2 = popup_bounds
2175
+ popup_width = px2 - px1
2176
+ popup_height = py2 - py1
2177
+
2178
+ # 【优化】X按钮有三种常见位置:
2179
+ # 1. 弹窗内靠近顶部边界(内嵌X按钮)- 最常见
2180
+ # 2. 弹窗边界上方(浮动X按钮)
2181
+ # 3. 弹窗正下方(底部关闭按钮)
2182
+ offset_x = max(60, int(popup_width * 0.07)) # 宽度7%
2183
+ offset_y_above = max(35, int(popup_height * 0.025)) # 高度2.5%,在边界之上
2184
+ offset_y_near = max(45, int(popup_height * 0.03)) # 高度3%,紧贴顶边界内侧
2185
+
2186
+ try_positions = [
2187
+ # 【最高优先级】弹窗内紧贴顶部边界
2188
+ (px2 - offset_x, py1 + offset_y_near, "弹窗右上角"),
2189
+ # 弹窗边界上方(浮动X按钮)
2190
+ (px2 - offset_x, py1 - offset_y_above, "弹窗右上浮"),
2191
+ # 弹窗正下方中间(底部关闭按钮)
2192
+ ((px1 + px2) // 2, py2 + max(50, int(popup_height * 0.04)), "弹窗下方中间"),
2193
+ # 弹窗正上方中间
2194
+ ((px1 + px2) // 2, py1 - 40, "弹窗正上方"),
2195
+ ]
2196
+
2197
+ for try_x, try_y, position_name in try_positions:
2198
+ if 0 <= try_x <= screen_width and 0 <= try_y <= screen_height:
2199
+ self.client.u2.click(try_x, try_y)
2200
+ time.sleep(0.3)
2201
+
2202
+ # 尝试后截图,让 AI 判断是否成功
2203
+ screenshot_result = self.take_screenshot("尝试关闭后")
2204
+ return {
2205
+ "success": True,
2206
+ "message": f"✅ 已尝试点击常见关闭按钮位置",
2207
+ "tried_positions": [p[2] for p in try_positions],
2208
+ "screenshot": screenshot_result.get("screenshot_path", ""),
2209
+ "tip": "请查看截图确认弹窗是否已关闭。如果还在,可手动分析截图找到关闭按钮位置。"
2210
+ }
2211
+
2212
+ # 没有检测到弹窗区域,截图让 AI 分析
2213
+ screenshot_result = self.take_screenshot(description="页面截图", compress=True)
2214
+
2215
+ return {
2216
+ "success": False,
2217
+ "message": "❌ 未检测到弹窗区域,已截图供 AI 分析",
2218
+ "action_required": "请查看截图找到关闭按钮,调用 mobile_click_at_coords 点击",
2219
+ "screenshot": screenshot_result.get("screenshot_path", ""),
2220
+ "screen_size": {"width": screen_width, "height": screen_height},
2221
+ "image_size": {
2222
+ "width": screenshot_result.get("image_width", screen_width),
2223
+ "height": screenshot_result.get("image_height", screen_height)
2224
+ },
2225
+ "original_size": {
2226
+ "width": screenshot_result.get("original_img_width", screen_width),
2227
+ "height": screenshot_result.get("original_img_height", screen_height)
2228
+ },
2229
+ "search_areas": ["弹窗右上角", "弹窗正上方", "弹窗下方中间", "屏幕右上角"],
2230
+ "time_warning": "⚠️ 截图分析期间弹窗可能自动消失。如果是定时弹窗,建议等待其自动消失。"
2231
+ }
2232
+
2233
+ # 按得分排序,取最可能的
2234
+ close_candidates.sort(key=lambda x: x['score'], reverse=True)
2235
+ best = close_candidates[0]
2236
+
2237
+ # 点击
2238
+ self.client.u2.click(best['center_x'], best['center_y'])
2239
+ time.sleep(0.5)
2240
+
2241
+ # 点击后截图,让 AI 判断是否成功
2242
+ screenshot_result = self.take_screenshot("关闭弹窗后")
2243
+
2244
+ # 记录操作(使用百分比,跨设备兼容)
2245
+ self._record_operation(
2246
+ 'click',
2247
+ x=best['center_x'],
2248
+ y=best['center_y'],
2249
+ x_percent=best['x_percent'],
2250
+ y_percent=best['y_percent'],
2251
+ screen_width=screen_width,
2252
+ screen_height=screen_height,
2253
+ ref=f"close_popup_{best['position']}"
2254
+ )
2255
+
2256
+ # 返回候选按钮列表,让 AI 看截图判断
2257
+ # 如果弹窗还在,AI 可以选择点击其他候选按钮
2258
+ return {
2259
+ "success": True,
2260
+ "message": f"✅ 已点击关闭按钮 ({best['position']}): ({best['center_x']}, {best['center_y']})",
2261
+ "clicked": {
2262
+ "position": best['position'],
2263
+ "match_type": best['match_type'],
2264
+ "coords": (best['center_x'], best['center_y']),
2265
+ "percent": (best['x_percent'], best['y_percent'])
2266
+ },
2267
+ "screenshot": screenshot_result.get("screenshot_path", ""),
2268
+ "popup_detected": popup_bounds is not None,
2269
+ "popup_bounds": f"[{popup_bounds[0]},{popup_bounds[1]}][{popup_bounds[2]},{popup_bounds[3]}]" if popup_bounds else None,
2270
+ "other_candidates": [
2271
+ {
2272
+ "position": c['position'],
2273
+ "type": c['match_type'],
2274
+ "coords": (c['center_x'], c['center_y']),
2275
+ "percent": (c['x_percent'], c['y_percent'])
2276
+ }
2277
+ for c in close_candidates[1:4] # 返回其他3个候选,AI 可以选择
2278
+ ],
2279
+ "tip": "请查看截图判断弹窗是否已关闭。如果弹窗还在,可以尝试点击 other_candidates 中的其他位置;如果误点跳转了,请按返回键"
2280
+ }
2281
+
2282
+ except Exception as e:
2283
+ return {"success": False, "message": f"❌ 关闭弹窗失败: {e}"}
2284
+
2285
+ def _get_position_name(self, rel_x: float, rel_y: float) -> str:
2286
+ """根据相对坐标获取位置名称"""
2287
+ if rel_y < 0.4:
2288
+ if rel_x > 0.6:
2289
+ return "右上角"
2290
+ elif rel_x < 0.4:
2291
+ return "左上角"
2292
+ else:
2293
+ return "顶部中间"
2294
+ elif rel_y > 0.6:
2295
+ if rel_x > 0.6:
2296
+ return "右下角"
2297
+ elif rel_x < 0.4:
2298
+ return "左下角"
2299
+ else:
2300
+ return "底部中间"
2301
+ else:
2302
+ if rel_x > 0.6:
2303
+ return "右侧"
2304
+ elif rel_x < 0.4:
2305
+ return "左侧"
2306
+ else:
2307
+ return "中间"
2308
+
2309
+ def _get_position_score(self, rel_x: float, rel_y: float) -> float:
2310
+ """根据位置计算额外得分(角落位置加分更多)"""
2311
+ # 弹窗关闭按钮常见位置得分:右上角 > 左上角 > 底部中间 > 其他角落
2312
+ if rel_y < 0.4: # 上半部分
2313
+ if rel_x > 0.6: # 右上角
2314
+ return 2.0 + (rel_x - 0.6) + (0.4 - rel_y)
2315
+ elif rel_x < 0.4: # 左上角
2316
+ return 1.5 + (0.4 - rel_x) + (0.4 - rel_y)
2317
+ else: # 顶部中间
2318
+ return 1.0
2319
+ elif rel_y > 0.6: # 下半部分
2320
+ if 0.3 < rel_x < 0.7: # 底部中间
2321
+ return 1.2 + (1 - abs(rel_x - 0.5) * 2)
2322
+ else: # 底部角落
2323
+ return 0.8
2324
+ else: # 中间区域
2325
+ return 0.5
2326
+
2327
+ def assert_text(self, text: str) -> Dict:
2328
+ """检查页面是否包含文本"""
2329
+ try:
2330
+ if self._is_ios():
2331
+ ios_client = self._get_ios_client()
2332
+ if ios_client and hasattr(ios_client, 'wda'):
2333
+ exists = ios_client.wda(name=text).exists or ios_client.wda(label=text).exists
2334
+ else:
2335
+ exists = False
2336
+ else:
2337
+ exists = self.client.u2(text=text).exists()
2338
+
2339
+ return {
2340
+ "success": True,
2341
+ "found": exists,
2342
+ "text": text,
2343
+ "message": f"✅ 文本'{text}' {'存在' if exists else '不存在'}"
2344
+ }
2345
+ except Exception as e:
2346
+ return {"success": False, "message": f"❌ 断言失败: {e}"}
2347
+
2348
+ # ==================== 脚本生成 ====================
2349
+
2350
+ def get_operation_history(self, limit: Optional[int] = None) -> Dict:
2351
+ """获取操作历史"""
2352
+ history = self.operation_history
2353
+ if limit:
2354
+ history = history[-limit:]
2355
+ return {
2356
+ "success": True,
2357
+ "count": len(history),
2358
+ "total": len(self.operation_history),
2359
+ "operations": history
2360
+ }
2361
+
2362
+ def clear_operation_history(self) -> Dict:
2363
+ """清空操作历史"""
2364
+ count = len(self.operation_history)
2365
+ self.operation_history = []
2366
+ return {"success": True, "message": f"✅ 已清空 {count} 条记录"}
2367
+
2368
+ def generate_test_script(self, test_name: str, package_name: str, filename: str) -> Dict:
2369
+ """生成 pytest 测试脚本(带智能等待、广告处理和跨设备兼容)
2370
+
2371
+ 优化:
2372
+ 1. 坐标点击自动转换为百分比定位(跨分辨率兼容)
2373
+ 2. 优先使用 ID/文本定位(最稳定)
2374
+ 3. 百分比定位作为坐标的替代方案
2375
+ """
2376
+ if not self.operation_history:
2377
+ return {"success": False, "message": "❌ 没有操作历史,请先执行一些操作"}
2378
+
2379
+ # 生成脚本
2380
+ safe_name = re.sub(r'[^\w\s-]', '', test_name).strip().replace(' ', '_')
2381
+
2382
+ script_lines = [
2383
+ "#!/usr/bin/env python3",
2384
+ "# -*- coding: utf-8 -*-",
2385
+ f'"""',
2386
+ f"测试用例: {test_name}",
2387
+ f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
2388
+ "",
2389
+ "定位策略(按优先级):",
2390
+ "1. ID 定位 - 最稳定,跨设备兼容",
2391
+ "2. 文本定位 - 稳定,跨设备兼容",
2392
+ "3. 百分比定位 - 跨分辨率兼容(坐标自动转换)",
2393
+ f'"""',
2394
+ "import time",
2395
+ "import uiautomator2 as u2",
2396
+ "",
2397
+ f'PACKAGE_NAME = "{package_name}"',
2398
+ "",
2399
+ "# === 配置(根据 App 情况调整)===",
2400
+ "LAUNCH_WAIT = 3 # 启动后等待时间(秒)",
2401
+ "CLOSE_AD_ON_LAUNCH = True # 是否尝试关闭启动广告",
2402
+ "AD_CLOSE_KEYWORDS = ['关闭', '跳过', 'Skip', 'Close', '×', 'X', '我知道了', '稍后再说']",
2403
+ "",
2404
+ "",
2405
+ "def smart_wait(d, seconds=1):",
2406
+ ' """等待页面稳定"""',
2407
+ " time.sleep(seconds)",
2408
+ "",
2409
+ "",
2410
+ "def close_ad_if_exists(d, quick=False):",
2411
+ ' """尝试关闭广告弹窗(quick=True 时只检查常见的)"""',
2412
+ " keywords = AD_CLOSE_KEYWORDS[:3] if quick else AD_CLOSE_KEYWORDS",
2413
+ " for keyword in keywords:",
2414
+ " elem = d(textContains=keyword)",
2415
+ " if elem.exists(timeout=0.3): # 缩短超时",
2416
+ " try:",
2417
+ " elem.click()",
2418
+ " print(f' 📢 关闭广告: {keyword}')",
2419
+ " time.sleep(0.3)",
2420
+ " return True",
2421
+ " except:",
2422
+ " pass",
2423
+ " return False",
2424
+ "",
2425
+ "",
2426
+ "def safe_click(d, selector, timeout=3):",
2427
+ ' """安全点击(带等待)"""',
2428
+ " try:",
2429
+ " if selector.exists(timeout=timeout):",
2430
+ " selector.click()",
2431
+ " return True",
2432
+ " return False",
2433
+ " except Exception as e:",
2434
+ " print(f' ⚠️ 点击失败: {e}')",
2435
+ " return False",
2436
+ "",
2437
+ "",
2438
+ "def click_by_percent(d, x_percent, y_percent):",
2439
+ ' """',
2440
+ ' 百分比点击(跨分辨率兼容)',
2441
+ ' ',
2442
+ ' 原理:屏幕左上角 (0%, 0%),右下角 (100%, 100%)',
2443
+ ' 优势:同样的百分比在不同分辨率设备上都能点到相同相对位置',
2444
+ ' """',
2445
+ " info = d.info",
2446
+ " width = info.get('displayWidth', 0)",
2447
+ " height = info.get('displayHeight', 0)",
2448
+ " x = int(width * x_percent / 100)",
2449
+ " y = int(height * y_percent / 100)",
2450
+ " d.click(x, y)",
2451
+ " return True",
2452
+ "",
2453
+ "",
2454
+ "def long_press_by_percent(d, x_percent, y_percent, duration=1.0):",
2455
+ ' """',
2456
+ ' 百分比长按(跨分辨率兼容)',
2457
+ ' ',
2458
+ ' 原理:屏幕左上角 (0%, 0%),右下角 (100%, 100%)',
2459
+ ' 优势:同样的百分比在不同分辨率设备上都能长按到相同相对位置',
2460
+ ' """',
2461
+ " info = d.info",
2462
+ " width = info.get('displayWidth', 0)",
2463
+ " height = info.get('displayHeight', 0)",
2464
+ " x = int(width * x_percent / 100)",
2465
+ " y = int(height * y_percent / 100)",
2466
+ " d.long_click(x, y, duration=duration)",
2467
+ " return True",
2468
+ "",
2469
+ "",
2470
+ "def test_main():",
2471
+ " # 连接设备",
2472
+ " d = u2.connect()",
2473
+ " d.implicitly_wait(10) # 设置全局等待",
2474
+ " ",
2475
+ " # 启动应用",
2476
+ f" d.app_start(PACKAGE_NAME)",
2477
+ " time.sleep(LAUNCH_WAIT) # 等待启动(可调整)",
2478
+ " ",
2479
+ " # 尝试关闭启动广告(可选,根据 App 情况调整)",
2480
+ " if CLOSE_AD_ON_LAUNCH:",
2481
+ " close_ad_if_exists(d)",
2482
+ " ",
2483
+ ]
2484
+
2485
+ # 生成操作代码(跳过启动应用相关操作,因为脚本头部已处理)
2486
+ step_num = 0
2487
+ for op in self.operation_history:
2488
+ action = op.get('action')
2489
+
2490
+ # 跳过 launch_app(脚本头部已经有 app_start)
2491
+ if action == 'launch_app':
2492
+ continue
2493
+
2494
+ step_num += 1
2495
+
2496
+ if action == 'click':
2497
+ ref = op.get('ref', '')
2498
+ element = op.get('element', '')
2499
+ has_coords = 'x' in op and 'y' in op
2500
+ has_percent = 'x_percent' in op and 'y_percent' in op
2501
+
2502
+ # 判断 ref 是否为坐标格式(coords_ 或 coords:)
2503
+ is_coords_ref = ref.startswith('coords_') or ref.startswith('coords:')
2504
+ is_percent_ref = ref.startswith('percent_')
2505
+
2506
+ # 优先级:ID > 文本 > 百分比 > 坐标(兜底)
2507
+ if ref and (':id/' in ref or ref.startswith('com.')):
2508
+ # 1️⃣ 使用 resource-id(最稳定)
2509
+ script_lines.append(f" # 步骤{step_num}: 点击元素 (ID定位,最稳定)")
2510
+ script_lines.append(f" safe_click(d, d(resourceId='{ref}'))")
2511
+ elif ref and not is_coords_ref and not is_percent_ref and ':' not in ref:
2512
+ # 2️⃣ 使用文本(稳定)- 排除 "text:xxx" 等带冒号的格式
2513
+ script_lines.append(f" # 步骤{step_num}: 点击文本 '{ref}' (文本定位)")
2514
+ script_lines.append(f" safe_click(d, d(text='{ref}'))")
2515
+ elif ref and ':' in ref and not is_coords_ref and not is_percent_ref:
2516
+ # 2️⃣-b 使用文本(Android 的 text:xxx 或 description:xxx 格式)
2517
+ # 提取冒号后面的实际文本值
2518
+ actual_text = ref.split(':', 1)[1] if ':' in ref else ref
2519
+ script_lines.append(f" # 步骤{step_num}: 点击文本 '{actual_text}' (文本定位)")
2520
+ script_lines.append(f" safe_click(d, d(text='{actual_text}'))")
2521
+ elif has_percent:
2522
+ # 3️⃣ 使用百分比(跨分辨率兼容)
2523
+ x_pct = op['x_percent']
2524
+ y_pct = op['y_percent']
2525
+ desc = f" ({element})" if element else ""
2526
+ script_lines.append(f" # 步骤{step_num}: 点击位置{desc} (百分比定位,跨分辨率兼容)")
2527
+ script_lines.append(f" click_by_percent(d, {x_pct}, {y_pct}) # 原坐标: ({op.get('x', '?')}, {op.get('y', '?')})")
2528
+ elif has_coords:
2529
+ # 4️⃣ 坐标兜底(不推荐,仅用于无法获取百分比的情况)
2530
+ desc = f" ({element})" if element else ""
2531
+ script_lines.append(f" # 步骤{step_num}: 点击坐标{desc} (⚠️ 坐标定位,可能不兼容其他分辨率)")
2532
+ script_lines.append(f" d.click({op['x']}, {op['y']})")
2533
+ else:
2534
+ continue # 无效操作,跳过
2535
+
2536
+ script_lines.append(" time.sleep(0.5) # 等待响应")
2537
+ script_lines.append(" ")
2538
+
2539
+ elif action == 'input':
2540
+ text = op.get('text', '')
2541
+ ref = op.get('ref', '')
2542
+ has_coords = 'x' in op and 'y' in op
2543
+ has_percent = 'x_percent' in op and 'y_percent' in op
2544
+
2545
+ # 判断 ref 是否为坐标格式
2546
+ is_coords_ref = ref.startswith('coords_') or ref.startswith('coords:')
2547
+
2548
+ # 优先使用 ID,其次百分比,最后坐标
2549
+ if ref and not is_coords_ref and (':id/' in ref or ref.startswith('com.')):
2550
+ # 完整格式的 resource-id
2551
+ script_lines.append(f" # 步骤{step_num}: 输入文本 '{text}' (ID定位)")
2552
+ script_lines.append(f" d(resourceId='{ref}').set_text('{text}')")
2553
+ elif ref and not is_coords_ref and not has_coords:
2554
+ # 简短格式的 resource-id(不包含 com. 或 :id/)
2555
+ script_lines.append(f" # 步骤{step_num}: 输入文本 '{text}' (ID定位)")
2556
+ script_lines.append(f" d(resourceId='{ref}').set_text('{text}')")
2557
+ elif has_percent:
2558
+ x_pct = op['x_percent']
2559
+ y_pct = op['y_percent']
2560
+ script_lines.append(f" # 步骤{step_num}: 点击后输入 (百分比定位)")
2561
+ script_lines.append(f" click_by_percent(d, {x_pct}, {y_pct})")
2562
+ script_lines.append(f" time.sleep(0.3)")
2563
+ script_lines.append(f" d.send_keys('{text}')")
2564
+ elif has_coords:
2565
+ script_lines.append(f" # 步骤{step_num}: 点击坐标后输入 (⚠️ 可能不兼容其他分辨率)")
2566
+ script_lines.append(f" d.click({op['x']}, {op['y']})")
2567
+ script_lines.append(f" time.sleep(0.3)")
2568
+ script_lines.append(f" d.send_keys('{text}')")
2569
+ else:
2570
+ # 兜底:无法识别的格式,跳过
2571
+ continue
2572
+ script_lines.append(" time.sleep(0.5)")
2573
+ script_lines.append(" ")
2574
+
2575
+ elif action == 'long_press':
2576
+ ref = op.get('ref', '')
2577
+ element = op.get('element', '')
2578
+ duration = op.get('duration', 1.0)
2579
+ has_coords = 'x' in op and 'y' in op
2580
+ has_percent = 'x_percent' in op and 'y_percent' in op
2581
+
2582
+ # 判断 ref 是否为坐标格式
2583
+ is_coords_ref = ref.startswith('coords_') or ref.startswith('coords:')
2584
+ is_percent_ref = ref.startswith('percent_')
2585
+
2586
+ # 优先级:ID > 文本 > 百分比 > 坐标
2587
+ if ref and (':id/' in ref or ref.startswith('com.')):
2588
+ # 使用 resource-id
2589
+ script_lines.append(f" # 步骤{step_num}: 长按元素 (ID定位,最稳定)")
2590
+ script_lines.append(f" d(resourceId='{ref}').long_click(duration={duration})")
2591
+ elif ref and not is_coords_ref and not is_percent_ref and ':' not in ref:
2592
+ # 使用文本
2593
+ script_lines.append(f" # 步骤{step_num}: 长按文本 '{ref}' (文本定位)")
2594
+ script_lines.append(f" d(text='{ref}').long_click(duration={duration})")
2595
+ elif ref and ':' in ref and not is_coords_ref and not is_percent_ref:
2596
+ actual_text = ref.split(':', 1)[1] if ':' in ref else ref
2597
+ script_lines.append(f" # 步骤{step_num}: 长按文本 '{actual_text}' (文本定位)")
2598
+ script_lines.append(f" d(text='{actual_text}').long_click(duration={duration})")
2599
+ elif has_percent:
2600
+ # 使用百分比
2601
+ x_pct = op['x_percent']
2602
+ y_pct = op['y_percent']
2603
+ desc = f" ({element})" if element else ""
2604
+ script_lines.append(f" # 步骤{step_num}: 长按位置{desc} (百分比定位,跨分辨率兼容)")
2605
+ script_lines.append(f" long_press_by_percent(d, {x_pct}, {y_pct}, duration={duration}) # 原坐标: ({op.get('x', '?')}, {op.get('y', '?')})")
2606
+ elif has_coords:
2607
+ # 坐标兜底
2608
+ desc = f" ({element})" if element else ""
2609
+ script_lines.append(f" # 步骤{step_num}: 长按坐标{desc} (⚠️ 坐标定位,可能不兼容其他分辨率)")
2610
+ script_lines.append(f" d.long_click({op['x']}, {op['y']}, duration={duration})")
2611
+ else:
2612
+ continue
2613
+
2614
+ script_lines.append(" time.sleep(0.5) # 等待响应")
2615
+ script_lines.append(" ")
2616
+
2617
+ elif action == 'swipe':
2618
+ direction = op.get('direction', 'up')
2619
+ script_lines.append(f" # 步骤{step_num}: 滑动 {direction}")
2620
+ script_lines.append(f" d.swipe_ext('{direction}')")
2621
+ script_lines.append(" time.sleep(0.5)")
2622
+ script_lines.append(" ")
2623
+
2624
+ elif action == 'press_key':
2625
+ key = op.get('key', 'enter')
2626
+ script_lines.append(f" # 步骤{step_num}: 按键 {key}")
2627
+ script_lines.append(f" d.press('{key}')")
2628
+ script_lines.append(" time.sleep(0.5)")
2629
+ script_lines.append(" ")
2630
+
2631
+ script_lines.extend([
2632
+ " print('✅ 测试完成')",
2633
+ "",
2634
+ "",
2635
+ "if __name__ == '__main__':",
2636
+ " test_main()",
2637
+ ])
2638
+
2639
+ script = '\n'.join(script_lines)
2640
+
2641
+ # 保存文件
2642
+ output_dir = Path("tests")
2643
+ output_dir.mkdir(exist_ok=True)
2644
+
2645
+ if not filename.endswith('.py'):
2646
+ filename = f"{filename}.py"
2647
+
2648
+ file_path = output_dir / filename
2649
+ file_path.write_text(script, encoding='utf-8')
2650
+
2651
+ return {
2652
+ "success": True,
2653
+ "file_path": str(file_path),
2654
+ "message": f"✅ 脚本已生成: {file_path}",
2655
+ "operations_count": len(self.operation_history),
2656
+ "preview": script[:500] + "..."
2657
+ }
2658
+
2659
+ # ========== 模板匹配功能 ==========
2660
+
2661
+ def template_match_close(self, screenshot_path: Optional[str] = None, threshold: float = 0.75) -> Dict:
2662
+ """使用模板匹配查找关闭按钮
2663
+
2664
+ 基于 OpenCV 模板匹配,从预设的X号模板库中查找匹配项。
2665
+ 比 AI 视觉识别更精准、更快速。
2666
+
2667
+ Args:
2668
+ screenshot_path: 截图路径(可选,不提供则自动截图)
2669
+ threshold: 匹配阈值 0-1,越高越严格,默认0.75
2670
+
2671
+ Returns:
2672
+ 匹配结果,包含坐标和点击命令
2673
+ """
2674
+ try:
2675
+ from .template_matcher import TemplateMatcher
2676
+
2677
+ # 如果没有提供截图,先截图
2678
+ if screenshot_path is None:
2679
+ screenshot_result = self.take_screenshot(description="模板匹配", compress=False)
2680
+ screenshot_path = screenshot_result.get("screenshot_path")
2681
+ if not screenshot_path:
2682
+ return {"success": False, "error": "截图失败"}
2683
+
2684
+ matcher = TemplateMatcher()
2685
+ result = matcher.find_close_buttons(screenshot_path, threshold)
2686
+
2687
+ return result
2688
+
2689
+ except ImportError:
2690
+ return {
2691
+ "success": False,
2692
+ "error": "需要安装 opencv-python: pip install opencv-python"
2693
+ }
2694
+ except Exception as e:
2695
+ return {"success": False, "error": f"模板匹配失败: {e}"}
2696
+
2697
+ def template_click_close(self, threshold: float = 0.75) -> Dict:
2698
+ """模板匹配并点击关闭按钮(一步到位)
2699
+
2700
+ 截图 -> 模板匹配 -> 点击最佳匹配位置
2701
+
2702
+ Args:
2703
+ threshold: 匹配阈值 0-1
2704
+
2705
+ Returns:
2706
+ 操作结果
2707
+ """
2708
+ try:
2709
+ # 先截图并匹配
2710
+ match_result = self.template_match_close(threshold=threshold)
2711
+
2712
+ if not match_result.get("success"):
2713
+ return match_result
2714
+
2715
+ # 获取最佳匹配的百分比坐标
2716
+ best = match_result.get("best_match", {})
2717
+ x_percent = best.get("percent", {}).get("x")
2718
+ y_percent = best.get("percent", {}).get("y")
2719
+
2720
+ if x_percent is None or y_percent is None:
2721
+ return {"success": False, "error": "无法获取匹配坐标"}
2722
+
2723
+ # 点击
2724
+ click_result = self.click_by_percent(x_percent, y_percent)
2725
+
2726
+ return {
2727
+ "success": True,
2728
+ "message": f"✅ 模板匹配并点击成功",
2729
+ "matched_template": best.get("template"),
2730
+ "confidence": best.get("confidence"),
2731
+ "clicked_position": f"({x_percent}%, {y_percent}%)",
2732
+ "click_result": click_result
2733
+ }
2734
+
2735
+ except Exception as e:
2736
+ return {"success": False, "error": f"模板点击失败: {e}"}
2737
+
2738
+ def template_add(self, screenshot_path: str, x: int, y: int,
2739
+ width: int, height: int, template_name: str) -> Dict:
2740
+ """从截图中裁剪并添加新模板
2741
+
2742
+ 当遇到新样式的X号时,用此方法添加到模板库。
2743
+
2744
+ Args:
2745
+ screenshot_path: 截图路径
2746
+ x, y: 裁剪区域左上角坐标
2747
+ width, height: 裁剪区域大小
2748
+ template_name: 模板名称(如 x_circle_gray)
2749
+
2750
+ Returns:
2751
+ 结果
2752
+ """
2753
+ try:
2754
+ from .template_matcher import TemplateMatcher
2755
+
2756
+ matcher = TemplateMatcher()
2757
+ return matcher.crop_and_add_template(
2758
+ screenshot_path, x, y, width, height, template_name
2759
+ )
2760
+ except ImportError:
2761
+ return {"success": False, "error": "需要安装 opencv-python"}
2762
+ except Exception as e:
2763
+ return {"success": False, "error": f"添加模板失败: {e}"}
2764
+
2765
+ def template_list(self) -> Dict:
2766
+ """列出所有关闭按钮模板"""
2767
+ try:
2768
+ from .template_matcher import TemplateMatcher
2769
+
2770
+ matcher = TemplateMatcher()
2771
+ return matcher.list_templates()
2772
+ except ImportError:
2773
+ return {"success": False, "error": "需要安装 opencv-python"}
2774
+ except Exception as e:
2775
+ return {"success": False, "error": f"列出模板失败: {e}"}
2776
+
2777
+ def template_delete(self, template_name: str) -> Dict:
2778
+ """删除指定模板"""
2779
+ try:
2780
+ from .template_matcher import TemplateMatcher
2781
+
2782
+ matcher = TemplateMatcher()
2783
+ return matcher.delete_template(template_name)
2784
+ except ImportError:
2785
+ return {"success": False, "error": "需要安装 opencv-python"}
2786
+ except Exception as e:
2787
+ return {"success": False, "error": f"删除模板失败: {e}"}
2788
+
2789
+ def close_ad_popup(self, auto_learn: bool = True) -> Dict:
2790
+ """智能关闭广告弹窗(专用于广告场景)
2791
+
2792
+ 按优先级尝试:
2793
+ 1. 控件树查找关闭按钮(最可靠)
2794
+ 2. 模板匹配(需要积累模板库)
2795
+ 3. 返回视觉信息供 AI 分析(如果前两步失败)
2796
+
2797
+ 自动学习:
2798
+ - 点击成功后,检查这个 X 是否已在模板库
2799
+ - 如果是新样式,自动裁剪并添加到模板库
2800
+
2801
+ Args:
2802
+ auto_learn: 是否自动学习新模板(点击成功后检查并保存)
2803
+
2804
+ Returns:
2805
+ 结果字典
2806
+ """
2807
+ import time
2808
+ import re
2809
+
2810
+ result = {
2811
+ "success": False,
2812
+ "method": None,
2813
+ "message": "",
2814
+ "learned_template": None
2815
+ }
2816
+
2817
+ if self._is_ios():
2818
+ return {"success": False, "error": "iOS 暂不支持此功能"}
2819
+
2820
+ try:
2821
+ import xml.etree.ElementTree as ET
2822
+
2823
+ # ========== 第1步:控件树查找关闭按钮 ==========
2824
+ xml_string = self.client.u2.dump_hierarchy()
2825
+ root = ET.fromstring(xml_string)
2826
+
2827
+ # 关闭按钮的常见特征
2828
+ close_keywords = ['关闭', '跳过', '×', 'X', 'x', 'close', 'skip', '取消']
2829
+ close_content_desc = ['关闭', '跳过', 'close', 'skip', 'dismiss']
2830
+
2831
+ close_candidates = []
2832
+
2833
+ for elem in root.iter():
2834
+ text = elem.attrib.get('text', '').strip()
2835
+ content_desc = elem.attrib.get('content-desc', '').strip()
2836
+ clickable = elem.attrib.get('clickable', 'false') == 'true'
2837
+ bounds_str = elem.attrib.get('bounds', '')
2838
+ resource_id = elem.attrib.get('resource-id', '')
2839
+
2840
+ if not bounds_str:
2841
+ continue
2842
+
2843
+ match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds_str)
2844
+ if not match:
2845
+ continue
2846
+
2847
+ x1, y1, x2, y2 = map(int, match.groups())
2848
+ width = x2 - x1
2849
+ height = y2 - y1
2850
+ cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
2851
+
2852
+ score = 0
2853
+ reason = ""
2854
+
2855
+ # 文本匹配
2856
+ for kw in close_keywords:
2857
+ if kw in text:
2858
+ score += 10
2859
+ reason = f"文本含'{kw}'"
2860
+ break
2861
+
2862
+ # content-desc 匹配
2863
+ for kw in close_content_desc:
2864
+ if kw.lower() in content_desc.lower():
2865
+ score += 8
2866
+ reason = f"描述含'{kw}'"
2867
+ break
2868
+
2869
+ # 小尺寸可点击元素(可能是 X 按钮)
2870
+ if clickable and 30 < width < 200 and 30 < height < 200:
2871
+ screen_width = self.client.u2.info.get('displayWidth', 1440)
2872
+ screen_height = self.client.u2.info.get('displayHeight', 3200)
2873
+
2874
+ # 在屏幕右半边上半部分,很可能是 X
2875
+ if cx > screen_width * 0.6 and cy < screen_height * 0.5:
2876
+ score += 5
2877
+ reason = reason or "右上角小按钮"
2878
+ # 在屏幕上半部分的小按钮,也可能是 X
2879
+ elif cy < screen_height * 0.4:
2880
+ score += 2
2881
+ reason = reason or "上部小按钮"
2882
+
2883
+ # 只要是可点击的小按钮都考虑(即使没有文本)
2884
+ if score > 0 or (clickable and 30 < width < 150 and 30 < height < 150):
2885
+ if not reason and clickable:
2886
+ reason = "可点击小按钮"
2887
+ score = max(score, 1) # 确保有分数
2888
+ close_candidates.append({
2889
+ 'score': score,
2890
+ 'reason': reason,
2891
+ 'bounds': (x1, y1, x2, y2),
2892
+ 'center': (cx, cy),
2893
+ 'resource_id': resource_id,
2894
+ 'text': text
2895
+ })
2896
+
2897
+ # 按分数排序
2898
+ close_candidates.sort(key=lambda x: x['score'], reverse=True)
2899
+
2900
+ if close_candidates:
2901
+ best = close_candidates[0]
2902
+ cx, cy = best['center']
2903
+ bounds = best['bounds']
2904
+
2905
+ # 点击前截图(用于自动学习)
2906
+ pre_screenshot = None
2907
+ if auto_learn:
2908
+ pre_result = self.take_screenshot(description="关闭前", compress=False)
2909
+ pre_screenshot = pre_result.get("screenshot_path")
2910
+
2911
+ # 点击
2912
+ self.click_at_coords(cx, cy)
2913
+ time.sleep(0.5)
2914
+
2915
+ result["success"] = True
2916
+ result["method"] = "控件树"
2917
+ result["message"] = f"✅ 通过控件树找到关闭按钮并点击\n" \
2918
+ f" 位置: ({cx}, {cy})\n" \
2919
+ f" 原因: {best['reason']}"
2920
+
2921
+ # 自动学习:检查这个 X 是否已在模板库,不在就添加
2922
+ if auto_learn and pre_screenshot:
2923
+ learn_result = self._auto_learn_template(pre_screenshot, bounds)
2924
+ if learn_result:
2925
+ result["learned_template"] = learn_result
2926
+ result["message"] += f"\n📚 自动学习: {learn_result}"
2927
+
2928
+ return result
2929
+
2930
+ # ========== 第2步:模板匹配 ==========
2931
+ screenshot_path = None
2932
+ try:
2933
+ from .template_matcher import TemplateMatcher
2934
+
2935
+ # 截图用于模板匹配
2936
+ screenshot_result = self.take_screenshot(description="模板匹配", compress=False)
2937
+ screenshot_path = screenshot_result.get("screenshot_path")
2938
+
2939
+ if screenshot_path:
2940
+ matcher = TemplateMatcher()
2941
+ match_result = matcher.find_close_buttons(screenshot_path, threshold=0.75)
2942
+
2943
+ # 直接使用最佳匹配(已按置信度排序)
2944
+ if match_result.get("success") and match_result.get("best_match"):
2945
+ best = match_result["best_match"]
2946
+ x_pct = best["percent"]["x"]
2947
+ y_pct = best["percent"]["y"]
2948
+
2949
+ # 点击
2950
+ self.click_by_percent(x_pct, y_pct)
2951
+ time.sleep(0.5)
2952
+
2953
+ result["success"] = True
2954
+ result["method"] = "模板匹配"
2955
+ result["message"] = f"✅ 通过模板匹配找到关闭按钮并点击\n" \
2956
+ f" 模板: {best.get('template', 'unknown')}\n" \
2957
+ f" 置信度: {best.get('confidence', 'N/A')}%\n" \
2958
+ f" 位置: ({x_pct:.1f}%, {y_pct:.1f}%)"
2959
+ return result
2960
+
2961
+ except ImportError:
2962
+ pass # OpenCV 未安装,跳过模板匹配
2963
+ except Exception:
2964
+ pass # 模板匹配失败,继续下一步
2965
+
2966
+ # ========== 第3步:返回截图供 AI 分析 ==========
2967
+ if not screenshot_path:
2968
+ screenshot_result = self.take_screenshot(description="需要AI分析", compress=True)
2969
+
2970
+ result["success"] = False
2971
+ result["method"] = None
2972
+ result["message"] = "❌ 控件树和模板匹配都未找到关闭按钮\n" \
2973
+ "📸 已截图,请 AI 分析图片中的 X 按钮位置\n" \
2974
+ "💡 找到后使用 mobile_click_by_percent(x%, y%) 点击"
2975
+ result["screenshot"] = screenshot_result if not screenshot_path else {"screenshot_path": screenshot_path}
2976
+ result["need_ai_analysis"] = True
2977
+
2978
+ return result
2979
+
2980
+ except Exception as e:
2981
+ return {"success": False, "error": f"关闭弹窗失败: {e}"}
2982
+
2983
+ def _detect_popup_region(self, root) -> tuple:
2984
+ """从控件树中检测弹窗区域
2985
+
2986
+ Args:
2987
+ root: 控件树根元素
2988
+
2989
+ Returns:
2990
+ 弹窗边界 (x1, y1, x2, y2) 或 None
2991
+ """
2992
+ import re
2993
+
2994
+ screen_width = self.client.u2.info.get('displayWidth', 1440)
2995
+ screen_height = self.client.u2.info.get('displayHeight', 3200)
2996
+
2997
+ popup_candidates = []
2998
+
2999
+ for elem in root.iter():
3000
+ bounds_str = elem.attrib.get('bounds', '')
3001
+ if not bounds_str:
3002
+ continue
3003
+
3004
+ match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds_str)
3005
+ if not match:
3006
+ continue
3007
+
3008
+ x1, y1, x2, y2 = map(int, match.groups())
3009
+ width = x2 - x1
3010
+ height = y2 - y1
3011
+
3012
+ # 弹窗特征:
3013
+ # 1. 不是全屏
3014
+ # 2. 在屏幕中央
3015
+ # 3. 有一定大小
3016
+ is_fullscreen = (width >= screen_width * 0.95 and height >= screen_height * 0.9)
3017
+ is_centered = (x1 > screen_width * 0.05 and x2 < screen_width * 0.95)
3018
+ is_reasonable_size = (width > 200 and height > 200 and
3019
+ width < screen_width * 0.95 and
3020
+ height < screen_height * 0.8)
3021
+
3022
+ if not is_fullscreen and is_centered and is_reasonable_size:
3023
+ # 计算"弹窗感"分数
3024
+ area = width * height
3025
+ center_x = (x1 + x2) / 2
3026
+ center_y = (y1 + y2) / 2
3027
+ center_dist = abs(center_x - screen_width/2) + abs(center_y - screen_height/2)
3028
+
3029
+ score = area / 1000 - center_dist / 10
3030
+ popup_candidates.append({
3031
+ 'bounds': (x1, y1, x2, y2),
3032
+ 'score': score
3033
+ })
3034
+
3035
+ if popup_candidates:
3036
+ # 返回分数最高的弹窗
3037
+ popup_candidates.sort(key=lambda x: x['score'], reverse=True)
3038
+ return popup_candidates[0]['bounds']
3039
+
3040
+ return None
3041
+
3042
+ def _auto_learn_template(self, screenshot_path: str, bounds: tuple, threshold: float = 0.6) -> str:
3043
+ """自动学习:检查 X 按钮是否已在模板库,不在就添加
3044
+
3045
+ Args:
3046
+ screenshot_path: 截图路径
3047
+ bounds: X 按钮的边界 (x1, y1, x2, y2)
3048
+ threshold: 判断是否已存在的阈值(高于此值认为已存在)
3049
+
3050
+ Returns:
3051
+ 新模板名称,如果是新模板的话;已存在或失败返回 None
3052
+ """
3053
+ try:
3054
+ from .template_matcher import TemplateMatcher
3055
+ from PIL import Image
3056
+ import time
3057
+
3058
+ x1, y1, x2, y2 = bounds
3059
+ cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
3060
+ width = x2 - x1
3061
+ height = y2 - y1
3062
+
3063
+ # 扩展一点边界,确保裁剪完整
3064
+ padding = max(10, int(max(width, height) * 0.2))
3065
+
3066
+ # 打开截图
3067
+ img = Image.open(screenshot_path)
3068
+
3069
+ # 裁剪 X 按钮区域
3070
+ crop_x1 = max(0, x1 - padding)
3071
+ crop_y1 = max(0, y1 - padding)
3072
+ crop_x2 = min(img.width, x2 + padding)
3073
+ crop_y2 = min(img.height, y2 + padding)
3074
+
3075
+ cropped = img.crop((crop_x1, crop_y1, crop_x2, crop_y2))
3076
+
3077
+ # 保存临时文件用于匹配检查
3078
+ temp_path = self.screenshot_dir / "temp_new_x.png"
3079
+ cropped.save(str(temp_path))
3080
+
3081
+ # 检查是否已在模板库中(用模板匹配检测相似度)
3082
+ matcher = TemplateMatcher()
3083
+
3084
+ import cv2
3085
+ new_img = cv2.imread(str(temp_path), cv2.IMREAD_GRAYSCALE)
3086
+ if new_img is None:
3087
+ return None
3088
+
3089
+ is_new = True
3090
+ for template_file in matcher.template_dir.glob("*.png"):
3091
+ template = cv2.imread(str(template_file), cv2.IMREAD_GRAYSCALE)
3092
+ if template is None:
3093
+ continue
3094
+
3095
+ # 将两个图都调整到合适大小,然后用小模板在大图中搜索
3096
+ # 这样比较更接近实际匹配场景
3097
+
3098
+ # 新图作为搜索区域(稍大一点)
3099
+ new_resized = cv2.resize(new_img, (100, 100))
3100
+ # 模板调整到较小尺寸
3101
+ template_resized = cv2.resize(template, (60, 60))
3102
+
3103
+ # 在新图中搜索模板
3104
+ result = cv2.matchTemplate(new_resized, template_resized, cv2.TM_CCOEFF_NORMED)
3105
+ _, max_val, _, _ = cv2.minMaxLoc(result)
3106
+
3107
+ if max_val >= threshold:
3108
+ is_new = False
3109
+ break
3110
+
3111
+ # 清理临时文件
3112
+ if temp_path.exists():
3113
+ temp_path.unlink()
3114
+
3115
+ if is_new:
3116
+ # 生成唯一模板名
3117
+ timestamp = time.strftime("%m%d_%H%M%S")
3118
+ template_name = f"auto_x_{timestamp}.png"
3119
+ template_path = matcher.template_dir / template_name
3120
+
3121
+ # 保存新模板
3122
+ cropped.save(str(template_path))
3123
+
3124
+ return template_name
3125
+ else:
3126
+ return None # 已存在类似模板
3127
+
3128
+ except Exception as e:
3129
+ return None # 学习失败,不影响主流程
3130
+
3131
+ def template_add_by_percent(self, x_percent: float, y_percent: float,
3132
+ size: int, template_name: str) -> Dict:
3133
+ """通过百分比坐标添加模板(更方便!)
3134
+
3135
+ 自动截图 → 根据百分比位置裁剪 → 保存为模板
3136
+
3137
+ Args:
3138
+ x_percent: X号中心的水平百分比 (0-100)
3139
+ y_percent: X号中心的垂直百分比 (0-100)
3140
+ size: 裁剪区域大小(正方形边长,像素)
3141
+ template_name: 模板名称
3142
+
3143
+ Returns:
3144
+ 结果
3145
+ """
3146
+ try:
3147
+ from .template_matcher import TemplateMatcher
3148
+ from PIL import Image
3149
+
3150
+ # 先截图(不带 SoM 标注的干净截图)
3151
+ screenshot_result = self.take_screenshot(description="添加模板", compress=False)
3152
+ screenshot_path = screenshot_result.get("screenshot_path")
3153
+
3154
+ if not screenshot_path:
3155
+ return {"success": False, "error": "截图失败"}
3156
+
3157
+ # 读取截图获取尺寸
3158
+ img = Image.open(screenshot_path)
3159
+ img_w, img_h = img.size
3160
+
3161
+ # 计算中心点像素坐标
3162
+ cx = int(img_w * x_percent / 100)
3163
+ cy = int(img_h * y_percent / 100)
3164
+
3165
+ # 计算裁剪区域
3166
+ half = size // 2
3167
+ x1 = max(0, cx - half)
3168
+ y1 = max(0, cy - half)
3169
+ x2 = min(img_w, cx + half)
3170
+ y2 = min(img_h, cy + half)
3171
+
3172
+ # 裁剪并保存
3173
+ cropped = img.crop((x1, y1, x2, y2))
3174
+
3175
+ matcher = TemplateMatcher()
3176
+ output_path = matcher.template_dir / f"{template_name}.png"
3177
+ cropped.save(str(output_path))
3178
+
3179
+ return {
3180
+ "success": True,
3181
+ "message": f"✅ 模板已保存: {template_name}",
3182
+ "template_path": str(output_path),
3183
+ "center_percent": f"({x_percent}%, {y_percent}%)",
3184
+ "center_pixel": f"({cx}, {cy})",
3185
+ "crop_region": f"({x1},{y1}) - ({x2},{y2})",
3186
+ "size": f"{cropped.size[0]}x{cropped.size[1]}"
3187
+ }
3188
+
3189
+ except ImportError as e:
3190
+ return {"success": False, "error": f"需要安装依赖: {e}"}
3191
+ except Exception as e:
3192
+ return {"success": False, "error": f"添加模板失败: {e}"}
3193
+