mobile-mcp-ai 2.5.9__py3-none-any.whl → 2.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,7 @@
8
8
  - 核心功能精简
9
9
  - 保留 pytest 脚本生成
10
10
  - 支持操作历史记录
11
+ - Token 优化模式(省钱)
11
12
  """
12
13
 
13
14
  import asyncio
@@ -17,6 +18,19 @@ from pathlib import Path
17
18
  from typing import Dict, List, Optional
18
19
  from datetime import datetime
19
20
 
21
+ # Token 优化配置(只精简格式,不限制数量,确保准确度)
22
+ try:
23
+ from mobile_mcp.config import Config
24
+ TOKEN_OPTIMIZATION = Config.TOKEN_OPTIMIZATION_ENABLED
25
+ MAX_ELEMENTS = Config.MAX_ELEMENTS_RETURN
26
+ MAX_SOM_ELEMENTS = Config.MAX_SOM_ELEMENTS_RETURN
27
+ COMPACT_RESPONSE = Config.COMPACT_RESPONSE
28
+ except ImportError:
29
+ TOKEN_OPTIMIZATION = True
30
+ MAX_ELEMENTS = 0 # 0 = 不限制
31
+ MAX_SOM_ELEMENTS = 0 # 0 = 不限制
32
+ COMPACT_RESPONSE = True
33
+
20
34
 
21
35
  class BasicMobileToolsLite:
22
36
  """精简版移动端工具"""
@@ -31,6 +45,9 @@ class BasicMobileToolsLite:
31
45
 
32
46
  # 操作历史(用于生成 pytest 脚本)
33
47
  self.operation_history: List[Dict] = []
48
+
49
+ # 目标应用包名(用于监测应用跳转)
50
+ self.target_package: Optional[str] = None
34
51
 
35
52
  def _is_ios(self) -> bool:
36
53
  """判断当前是否为 iOS 平台"""
@@ -45,7 +62,7 @@ class BasicMobileToolsLite:
45
62
  return None
46
63
 
47
64
  def _record_operation(self, action: str, **kwargs):
48
- """记录操作到历史"""
65
+ """记录操作到历史(旧接口,保持兼容)"""
49
66
  record = {
50
67
  'action': action,
51
68
  'timestamp': datetime.now().isoformat(),
@@ -53,34 +70,232 @@ class BasicMobileToolsLite:
53
70
  }
54
71
  self.operation_history.append(record)
55
72
 
56
- def _get_full_hierarchy(self) -> str:
57
- """获取完整的 UI 层级 XML(包含 NAF 元素)
73
+ def _record_click(self, locator_type: str, locator_value: str,
74
+ x_percent: float = 0, y_percent: float = 0,
75
+ element_desc: str = '', locator_attr: str = ''):
76
+ """记录点击操作(标准格式)
77
+
78
+ Args:
79
+ locator_type: 定位类型 'text' | 'id' | 'percent' | 'coords'
80
+ locator_value: 定位值(文本内容、resource-id、或坐标描述)
81
+ x_percent: 百分比 X 坐标(兜底方案)
82
+ y_percent: 百分比 Y 坐标(兜底方案)
83
+ element_desc: 元素描述(用于脚本注释)
84
+ locator_attr: Android 选择器属性 'text'|'textContains'|'description'|'descriptionContains'
85
+ """
86
+ record = {
87
+ 'action': 'click',
88
+ 'timestamp': datetime.now().isoformat(),
89
+ 'locator_type': locator_type,
90
+ 'locator_value': locator_value,
91
+ 'locator_attr': locator_attr or locator_type, # 默认与 type 相同
92
+ 'x_percent': x_percent,
93
+ 'y_percent': y_percent,
94
+ 'element_desc': element_desc or locator_value,
95
+ }
96
+ self.operation_history.append(record)
97
+
98
+ def _record_long_press(self, locator_type: str, locator_value: str,
99
+ duration: float = 1.0,
100
+ x_percent: float = 0, y_percent: float = 0,
101
+ element_desc: str = '', locator_attr: str = ''):
102
+ """记录长按操作(标准格式)"""
103
+ record = {
104
+ 'action': 'long_press',
105
+ 'timestamp': datetime.now().isoformat(),
106
+ 'locator_type': locator_type,
107
+ 'locator_value': locator_value,
108
+ 'locator_attr': locator_attr or locator_type,
109
+ 'duration': duration,
110
+ 'x_percent': x_percent,
111
+ 'y_percent': y_percent,
112
+ 'element_desc': element_desc or locator_value,
113
+ }
114
+ self.operation_history.append(record)
115
+
116
+ def _record_input(self, text: str, locator_type: str = '', locator_value: str = '',
117
+ x_percent: float = 0, y_percent: float = 0):
118
+ """记录输入操作(标准格式)"""
119
+ record = {
120
+ 'action': 'input',
121
+ 'timestamp': datetime.now().isoformat(),
122
+ 'text': text,
123
+ 'locator_type': locator_type,
124
+ 'locator_value': locator_value,
125
+ 'x_percent': x_percent,
126
+ 'y_percent': y_percent,
127
+ }
128
+ self.operation_history.append(record)
129
+
130
+ def _record_swipe(self, direction: str):
131
+ """记录滑动操作"""
132
+ record = {
133
+ 'action': 'swipe',
134
+ 'timestamp': datetime.now().isoformat(),
135
+ 'direction': direction,
136
+ }
137
+ self.operation_history.append(record)
138
+
139
+ def _record_key(self, key: str):
140
+ """记录按键操作"""
141
+ record = {
142
+ 'action': 'press_key',
143
+ 'timestamp': datetime.now().isoformat(),
144
+ 'key': key,
145
+ }
146
+ self.operation_history.append(record)
147
+
148
+ def _get_current_package(self) -> Optional[str]:
149
+ """获取当前前台应用的包名/Bundle ID"""
150
+ try:
151
+ if self._is_ios():
152
+ ios_client = self._get_ios_client()
153
+ if ios_client and hasattr(ios_client, 'wda'):
154
+ app_info = ios_client.wda.session().app_current()
155
+ return app_info.get('bundleId')
156
+ else:
157
+ info = self.client.u2.app_current()
158
+ return info.get('package')
159
+ except Exception:
160
+ return None
161
+
162
+ def _check_app_switched(self) -> Dict:
163
+ """检查是否已跳出目标应用
58
164
 
59
- 优先使用 ADB 直接 dump,比 uiautomator2.dump_hierarchy 更完整
165
+ Returns:
166
+ {
167
+ 'switched': bool, # 是否跳转
168
+ 'current_package': str, # 当前应用包名
169
+ 'target_package': str, # 目标应用包名
170
+ 'message': str # 提示信息
171
+ }
60
172
  """
61
- import sys
173
+ if not self.target_package:
174
+ return {
175
+ 'switched': False,
176
+ 'current_package': None,
177
+ 'target_package': None,
178
+ 'message': '⚠️ 未设置目标应用,无法监测应用跳转'
179
+ }
62
180
 
63
- if self._is_ios():
64
- # iOS 使用 page_source
65
- ios_client = self._get_ios_client()
66
- if ios_client and hasattr(ios_client, 'wda'):
67
- return ios_client.wda.source()
68
- return ""
181
+ current = self._get_current_package()
182
+ if not current:
183
+ return {
184
+ 'switched': False,
185
+ 'current_package': None,
186
+ 'target_package': self.target_package,
187
+ 'message': '⚠️ 无法获取当前应用包名'
188
+ }
189
+
190
+ if current != self.target_package:
191
+ return {
192
+ 'switched': True,
193
+ 'current_package': current,
194
+ 'target_package': self.target_package,
195
+ 'message': f'⚠️ 应用已跳转!当前应用: {current},目标应用: {self.target_package}'
196
+ }
197
+
198
+ return {
199
+ 'switched': False,
200
+ 'current_package': current,
201
+ 'target_package': self.target_package,
202
+ 'message': f'✅ 仍在目标应用: {current}'
203
+ }
204
+
205
+ def _return_to_target_app(self) -> Dict:
206
+ """返回到目标应用
207
+
208
+ 策略:
209
+ 1. 先按返回键(可能关闭弹窗或返回上一页)
210
+ 2. 如果还在其他应用,启动目标应用
211
+ 3. 验证是否成功返回
212
+
213
+ Returns:
214
+ {
215
+ 'success': bool,
216
+ 'message': str,
217
+ 'method': str # 使用的返回方法
218
+ }
219
+ """
220
+ if not self.target_package:
221
+ return {
222
+ 'success': False,
223
+ 'message': '❌ 未设置目标应用,无法返回',
224
+ 'method': None
225
+ }
69
226
 
70
- # Android: 优先使用 ADB 直接 dump
71
227
  try:
72
- # 方法1: ADB dump(获取最完整的 UI 树,包括 NAF 元素)
73
- self.client.u2.shell('uiautomator dump /sdcard/ui_dump.xml')
74
- result = self.client.u2.shell('cat /sdcard/ui_dump.xml')
75
- if result and isinstance(result, str) and result.strip().startswith('<?xml'):
76
- xml_string = result.strip()
77
- self.client.u2.shell('rm /sdcard/ui_dump.xml')
78
- return xml_string
228
+ # 先检查当前应用
229
+ current = self._get_current_package()
230
+ if not current:
231
+ return {
232
+ 'success': False,
233
+ 'message': '❌ 无法获取当前应用包名',
234
+ 'method': None
235
+ }
236
+
237
+ # 如果已经在目标应用,不需要返回
238
+ if current == self.target_package:
239
+ return {
240
+ 'success': True,
241
+ 'message': f'✅ 已在目标应用: {self.target_package}',
242
+ 'method': 'already_in_target'
243
+ }
244
+
245
+ # 策略1: 先按返回键(可能关闭弹窗或返回)
246
+ if self._is_ios():
247
+ ios_client = self._get_ios_client()
248
+ if ios_client and hasattr(ios_client, 'wda'):
249
+ # iOS 返回键
250
+ ios_client.wda.press('home') # iOS 先按 home
251
+ time.sleep(0.5)
252
+ # 然后启动目标应用
253
+ ios_client.wda.app_activate(self.target_package)
254
+ else:
255
+ return {
256
+ 'success': False,
257
+ 'message': '❌ iOS 客户端未初始化',
258
+ 'method': None
259
+ }
260
+ else:
261
+ # Android: 先按返回键
262
+ self.client.u2.press('back')
263
+ time.sleep(0.5)
264
+
265
+ # 检查是否已返回
266
+ current = self._get_current_package()
267
+ if current == self.target_package:
268
+ return {
269
+ 'success': True,
270
+ 'message': f'✅ 已返回目标应用: {self.target_package}(通过返回键)',
271
+ 'method': 'back_key'
272
+ }
273
+
274
+ # 如果还在其他应用,启动目标应用
275
+ self.client.u2.app_start(self.target_package)
276
+ time.sleep(1)
277
+
278
+ # 验证是否成功返回
279
+ current = self._get_current_package()
280
+ if current == self.target_package:
281
+ return {
282
+ 'success': True,
283
+ 'message': f'✅ 已返回目标应用: {self.target_package}',
284
+ 'method': 'app_start'
285
+ }
286
+ else:
287
+ return {
288
+ 'success': False,
289
+ 'message': f'❌ 返回失败:当前应用仍为 {current},期望 {self.target_package}',
290
+ 'method': 'app_start'
291
+ }
79
292
  except Exception as e:
80
- print(f" ⚠️ ADB dump 失败: {e}", file=sys.stderr)
81
-
82
- # 方法2: 回退到 uiautomator2
83
- return self.client.u2.dump_hierarchy(compressed=False)
293
+ return {
294
+ 'success': False,
295
+ 'message': f'❌ 返回目标应用失败: {e}',
296
+ 'method': None
297
+ }
298
+
84
299
 
85
300
  # ==================== 截图 ====================
86
301
 
@@ -134,7 +349,7 @@ class BasicMobileToolsLite:
134
349
  size = ios_client.wda.window_size()
135
350
  screen_width, screen_height = size[0], size[1]
136
351
  else:
137
- return {"success": False, "message": "iOS 客户端未初始化"}
352
+ return {"success": False, "msg": "iOS未初始化"}
138
353
  else:
139
354
  self.client.u2.screenshot(str(temp_path))
140
355
  info = self.client.u2.info
@@ -185,22 +400,14 @@ class BasicMobileToolsLite:
185
400
 
186
401
  cropped_size = final_path.stat().st_size
187
402
 
403
+ # 返回结果
188
404
  return {
189
405
  "success": True,
190
406
  "screenshot_path": str(final_path),
191
- "screen_width": screen_width,
192
- "screen_height": screen_height,
193
407
  "image_width": img.width,
194
408
  "image_height": img.height,
195
409
  "crop_offset_x": crop_offset_x,
196
- "crop_offset_y": crop_offset_y,
197
- "file_size": f"{cropped_size/1024:.1f}KB",
198
- "message": f"🔍 局部截图已保存: {final_path}\n"
199
- f"📐 裁剪区域: ({crop_offset_x}, {crop_offset_y}) 起,{img.width}x{img.height} 像素\n"
200
- f"📦 文件大小: {cropped_size/1024:.0f}KB\n"
201
- f"🎯 【坐标换算】AI 返回坐标 (x, y) 后:\n"
202
- f" 实际屏幕坐标 = ({crop_offset_x} + x, {crop_offset_y} + y)\n"
203
- f" 或直接调用 mobile_click_at_coords(x, y, crop_offset_x={crop_offset_x}, crop_offset_y={crop_offset_y})"
410
+ "crop_offset_y": crop_offset_y
204
411
  }
205
412
 
206
413
  # ========== 情况2:全屏压缩截图 ==========
@@ -253,24 +460,14 @@ class BasicMobileToolsLite:
253
460
  compressed_size = final_path.stat().st_size
254
461
  saved_percent = (1 - compressed_size / original_size) * 100
255
462
 
463
+ # 返回结果
256
464
  return {
257
465
  "success": True,
258
466
  "screenshot_path": str(final_path),
259
- "screen_width": screen_width,
260
- "screen_height": screen_height,
261
- "original_img_width": original_img_width, # 截图原始宽度
262
- "original_img_height": original_img_height, # 截图原始高度
263
- "image_width": image_width, # 压缩后宽度(AI 看到的)
264
- "image_height": image_height, # 压缩后高度(AI 看到的)
265
- "original_size": f"{original_size/1024:.1f}KB",
266
- "compressed_size": f"{compressed_size/1024:.1f}KB",
267
- "saved_percent": f"{saved_percent:.0f}%",
268
- "message": f"📸 截图已保存: {final_path}\n"
269
- f"📐 原始尺寸: {original_img_width}x{original_img_height} → 压缩后: {image_width}x{image_height}\n"
270
- f"📦 已压缩: {original_size/1024:.0f}KB → {compressed_size/1024:.0f}KB (省 {saved_percent:.0f}%)\n"
271
- f"⚠️ 【坐标转换】AI 返回坐标后,请传入:\n"
272
- f" image_width={image_width}, image_height={image_height},\n"
273
- f" original_img_width={original_img_width}, original_img_height={original_img_height}"
467
+ "image_width": image_width,
468
+ "image_height": image_height,
469
+ "original_img_width": original_img_width,
470
+ "original_img_height": original_img_height
274
471
  }
275
472
 
276
473
  # ========== 情况3:全屏不压缩截图 ==========
@@ -284,21 +481,12 @@ class BasicMobileToolsLite:
284
481
  final_path = self.screenshot_dir / filename
285
482
  temp_path.rename(final_path)
286
483
 
287
- # 不压缩时,用截图实际尺寸(可能和 screen_width 不同)
484
+ # 返回结果(不压缩时尺寸相同)
288
485
  return {
289
486
  "success": True,
290
487
  "screenshot_path": str(final_path),
291
- "screen_width": screen_width,
292
- "screen_height": screen_height,
293
- "original_img_width": img.width, # 截图实际尺寸
294
- "original_img_height": img.height,
295
- "image_width": img.width, # 未压缩,和原图一样
296
- "image_height": img.height,
297
- "file_size": f"{original_size/1024:.1f}KB",
298
- "message": f"📸 截图已保存: {final_path}\n"
299
- f"📐 截图尺寸: {img.width}x{img.height}\n"
300
- f"📦 文件大小: {original_size/1024:.0f}KB(未压缩)\n"
301
- f"💡 未压缩,坐标可直接使用"
488
+ "image_width": img.width,
489
+ "image_height": img.height
302
490
  }
303
491
  except ImportError:
304
492
  # 如果没有 PIL,回退到原始方式(不压缩)
@@ -306,7 +494,7 @@ class BasicMobileToolsLite:
306
494
  except Exception as e:
307
495
  return {"success": False, "message": f"❌ 截图失败: {e}"}
308
496
 
309
- def take_screenshot_with_grid(self, grid_size: int = 100, show_popup_hints: bool = True) -> Dict:
497
+ def take_screenshot_with_grid(self, grid_size: int = 100, show_popup_hints: bool = False) -> Dict:
310
498
  """截图并添加网格坐标标注(用于精确定位元素)
311
499
 
312
500
  在截图上绘制网格线和坐标刻度,帮助快速定位元素位置。
@@ -338,7 +526,7 @@ class BasicMobileToolsLite:
338
526
  size = ios_client.wda.window_size()
339
527
  screen_width, screen_height = size[0], size[1]
340
528
  else:
341
- return {"success": False, "message": "iOS 客户端未初始化"}
529
+ return {"success": False, "msg": "iOS未初始化"}
342
530
  else:
343
531
  self.client.u2.screenshot(str(temp_path))
344
532
  info = self.client.u2.info
@@ -374,45 +562,22 @@ class BasicMobileToolsLite:
374
562
  # 左侧标注 Y 坐标
375
563
  draw.text((2, y + 2), str(y), fill=text_color, font=font_small)
376
564
 
377
- # 第3步:检测弹窗并标注
565
+ # 第3步:检测弹窗并标注(使用严格的置信度检测,避免误识别)
378
566
  popup_info = None
379
567
  close_positions = []
380
568
 
381
569
  if show_popup_hints and not self._is_ios():
382
570
  try:
383
571
  import xml.etree.ElementTree as ET
384
- xml_string = self._get_full_hierarchy()
572
+ xml_string = self.client.u2.dump_hierarchy(compressed=False)
385
573
  root = ET.fromstring(xml_string)
386
574
 
387
- # 检测弹窗区域
388
- popup_bounds = None
389
- for elem in root.iter():
390
- bounds_str = elem.attrib.get('bounds', '')
391
- class_name = elem.attrib.get('class', '')
392
-
393
- if not bounds_str:
394
- continue
395
-
396
- match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds_str)
397
- if not match:
398
- continue
399
-
400
- x1, y1, x2, y2 = map(int, match.groups())
401
- width = x2 - x1
402
- height = y2 - y1
403
- area = width * height
404
- screen_area = screen_width * screen_height
405
-
406
- is_container = any(kw in class_name for kw in ['Layout', 'View', 'Dialog', 'Card'])
407
- area_ratio = area / screen_area if screen_area > 0 else 0
408
- is_not_fullscreen = (width < screen_width * 0.98 or height < screen_height * 0.98)
409
- is_reasonable_size = 0.08 < area_ratio < 0.85
410
-
411
- if is_container and is_not_fullscreen and is_reasonable_size and y1 > 50:
412
- if popup_bounds is None or area > (popup_bounds[2] - popup_bounds[0]) * (popup_bounds[3] - popup_bounds[1]):
413
- popup_bounds = (x1, y1, x2, y2)
575
+ # 使用严格的弹窗检测(置信度 >= 0.6 才认为是弹窗)
576
+ popup_bounds, popup_confidence = self._detect_popup_with_confidence(
577
+ root, screen_width, screen_height
578
+ )
414
579
 
415
- if popup_bounds:
580
+ if popup_bounds and popup_confidence >= 0.6:
416
581
  px1, py1, px2, py2 = popup_bounds
417
582
  popup_width = px2 - px1
418
583
  popup_height = py2 - py1
@@ -475,26 +640,16 @@ class BasicMobileToolsLite:
475
640
  result = {
476
641
  "success": True,
477
642
  "screenshot_path": str(final_path),
478
- "screen_width": screen_width,
479
- "screen_height": screen_height,
480
643
  "image_width": img_width,
481
644
  "image_height": img_height,
482
- "grid_size": grid_size,
483
- "message": f"📸 网格截图已保存: {final_path}\n"
484
- f"📐 尺寸: {img_width}x{img_height}\n"
485
- f"📏 网格间距: {grid_size}px"
645
+ "grid_size": grid_size
486
646
  }
487
647
 
488
648
  if popup_info:
489
- result["popup_detected"] = True
490
- result["popup_bounds"] = popup_info["bounds"]
491
- result["close_button_hints"] = close_positions
492
- result["message"] += f"\n🎯 检测到弹窗: {popup_info['bounds']}"
493
- result["message"] += f"\n💡 可能的关闭按钮位置(绿色圆圈标注):"
494
- for pos in close_positions:
495
- result["message"] += f"\n {pos['priority']}. {pos['name']}: ({pos['x']}, {pos['y']})"
496
- else:
497
- result["popup_detected"] = False
649
+ result["popup"] = popup_info["bounds"]
650
+ # 只返回前3个最可能的关闭按钮位置
651
+ if close_positions:
652
+ result["close_hints"] = [(p['x'], p['y']) for p in close_positions[:3]]
498
653
 
499
654
  return result
500
655
 
@@ -531,7 +686,7 @@ class BasicMobileToolsLite:
531
686
  size = ios_client.wda.window_size()
532
687
  screen_width, screen_height = size[0], size[1]
533
688
  else:
534
- return {"success": False, "message": "iOS 客户端未初始化"}
689
+ return {"success": False, "msg": "iOS未初始化"}
535
690
  else:
536
691
  self.client.u2.screenshot(str(temp_path))
537
692
  info = self.client.u2.info
@@ -558,7 +713,7 @@ class BasicMobileToolsLite:
558
713
  else:
559
714
  try:
560
715
  import xml.etree.ElementTree as ET
561
- xml_string = self._get_full_hierarchy()
716
+ xml_string = self.client.u2.dump_hierarchy(compressed=False)
562
717
  root = ET.fromstring(xml_string)
563
718
 
564
719
  for elem in root.iter():
@@ -640,44 +795,24 @@ class BasicMobileToolsLite:
640
795
  'index': i + 1,
641
796
  'center': (cx, cy),
642
797
  'bounds': f"[{x1},{y1}][{x2},{y2}]",
643
- 'desc': elem['desc']
798
+ 'desc': elem['desc'],
799
+ 'text': elem.get('text', ''),
800
+ 'resource_id': elem.get('resource_id', '')
644
801
  })
645
802
 
646
- # 第3.5步:检测弹窗区域(用于标注)
803
+ # 第3.5步:检测弹窗区域(使用严格的置信度检测,避免误识别普通页面)
647
804
  popup_bounds = None
805
+ popup_confidence = 0
648
806
 
649
807
  if not self._is_ios():
650
808
  try:
651
- # 检测弹窗区域
652
- for elem in root.iter():
653
- bounds_str = elem.attrib.get('bounds', '')
654
- class_name = elem.attrib.get('class', '')
655
-
656
- if not bounds_str:
657
- continue
658
-
659
- match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds_str)
660
- if not match:
661
- continue
662
-
663
- px1, py1, px2, py2 = map(int, match.groups())
664
- p_width = px2 - px1
665
- p_height = py2 - py1
666
- p_area = p_width * p_height
667
- screen_area = screen_width * screen_height
668
-
669
- is_container = any(kw in class_name for kw in ['Layout', 'View', 'Dialog', 'Card', 'Frame'])
670
- area_ratio = p_area / screen_area if screen_area > 0 else 0
671
- is_not_fullscreen = (p_width < screen_width * 0.99 or p_height < screen_height * 0.95)
672
- # 放宽面积范围:5% - 95%
673
- is_reasonable_size = 0.05 < area_ratio < 0.95
674
-
675
- if is_container and is_not_fullscreen and is_reasonable_size and py1 > 30:
676
- if popup_bounds is None or p_area > (popup_bounds[2] - popup_bounds[0]) * (popup_bounds[3] - popup_bounds[1]):
677
- popup_bounds = (px1, py1, px2, py2)
809
+ # 使用严格的弹窗检测(置信度 >= 0.6 才认为是弹窗)
810
+ popup_bounds, popup_confidence = self._detect_popup_with_confidence(
811
+ root, screen_width, screen_height
812
+ )
678
813
 
679
814
  # 如果检测到弹窗,标注弹窗边界(不再猜测X按钮位置)
680
- if popup_bounds:
815
+ if popup_bounds and popup_confidence >= 0.6:
681
816
  px1, py1, px2, py2 = popup_bounds
682
817
 
683
818
  # 只画弹窗边框(蓝色),不再猜测X按钮位置
@@ -711,38 +846,15 @@ class BasicMobileToolsLite:
711
846
  img.save(str(final_path), "JPEG", quality=85)
712
847
  temp_path.unlink()
713
848
 
714
- # 构建元素列表文字
715
- elements_text = "\n".join([
716
- f" [{e['index']}] {e['desc']} → ({e['center'][0]}, {e['center'][1]})"
717
- for e in som_elements[:15] # 只显示前15个
718
- ])
719
- if len(som_elements) > 15:
720
- elements_text += f"\n ... 还有 {len(som_elements) - 15} 个元素"
721
-
722
- # 构建弹窗提示文字
723
- hints_text = ""
724
- if popup_bounds:
725
- hints_text = f"\n🎯 检测到弹窗区域(蓝色边框)\n"
726
- hints_text += f" 如需关闭弹窗,请观察图片中的 X 按钮位置\n"
727
- hints_text += f" 然后使用 mobile_click_by_percent(x%, y%) 点击"
728
-
849
+ # 返回结果(Token 优化:不返回 elements 列表,已存储在 self._som_elements)
729
850
  return {
730
851
  "success": True,
731
852
  "screenshot_path": str(final_path),
732
853
  "screen_width": screen_width,
733
854
  "screen_height": screen_height,
734
- "image_width": img_width,
735
- "image_height": img_height,
736
855
  "element_count": len(som_elements),
737
- "elements": som_elements,
738
856
  "popup_detected": popup_bounds is not None,
739
- "popup_bounds": f"[{popup_bounds[0]},{popup_bounds[1]}][{popup_bounds[2]},{popup_bounds[3]}]" if popup_bounds else None,
740
- "message": f"📸 SoM 截图已保存: {final_path}\n"
741
- f"🏷️ 已标注 {len(som_elements)} 个可点击元素\n"
742
- f"📋 元素列表:\n{elements_text}{hints_text}\n\n"
743
- f"💡 使用方法:\n"
744
- f" - 点击标注元素:mobile_click_by_som(编号)\n"
745
- f" - 点击任意位置:mobile_click_by_percent(x%, y%)"
857
+ "hint": "查看截图上的编号,用 click_by_som(编号) 点击"
746
858
  }
747
859
 
748
860
  except ImportError:
@@ -788,14 +900,41 @@ class BasicMobileToolsLite:
788
900
  ios_client = self._get_ios_client()
789
901
  if ios_client and hasattr(ios_client, 'wda'):
790
902
  ios_client.wda.click(cx, cy)
903
+ size = ios_client.wda.window_size()
904
+ screen_width, screen_height = size[0], size[1]
791
905
  else:
792
906
  self.client.u2.click(cx, cy)
793
-
907
+ info = self.client.u2.info
908
+ screen_width = info.get('displayWidth', 0)
909
+ screen_height = info.get('displayHeight', 0)
910
+
794
911
  time.sleep(0.3)
795
912
 
913
+ # 计算百分比坐标用于跨设备兼容
914
+ x_percent = round(cx / screen_width * 100, 1) if screen_width > 0 else 0
915
+ y_percent = round(cy / screen_height * 100, 1) if screen_height > 0 else 0
916
+
917
+ # 使用标准记录格式
918
+ # 优先使用元素的文本/描述信息,这样生成脚本时可以用文本定位
919
+ elem_text = target.get('text', '')
920
+ elem_id = target.get('resource_id', '')
921
+ elem_desc = target.get('desc', '')
922
+
923
+ if elem_text and not elem_text.startswith('['): # 排除类似 "[可点击]" 的描述
924
+ # 有文本,使用文本定位
925
+ self._record_click('text', elem_text, x_percent, y_percent,
926
+ element_desc=f"[{index}]{elem_desc}", locator_attr='text')
927
+ elif elem_id:
928
+ # 有 resource-id,使用 ID 定位
929
+ self._record_click('id', elem_id, x_percent, y_percent,
930
+ element_desc=f"[{index}]{elem_desc}")
931
+ else:
932
+ # 都没有,使用百分比定位
933
+ self._record_click('percent', f"{x_percent}%,{y_percent}%", x_percent, y_percent,
934
+ element_desc=f"[{index}]{elem_desc}")
935
+
796
936
  return {
797
937
  "success": True,
798
- "message": f"✅ 已点击 [{index}] {target['desc']} → ({cx}, {cy})\n💡 建议:再次截图确认操作是否成功",
799
938
  "clicked": {
800
939
  "index": index,
801
940
  "desc": target['desc'],
@@ -829,7 +968,7 @@ class BasicMobileToolsLite:
829
968
  size = ios_client.wda.window_size()
830
969
  width, height = size[0], size[1]
831
970
  else:
832
- return {"success": False, "message": "iOS 客户端未初始化"}
971
+ return {"success": False, "msg": "iOS未初始化"}
833
972
  else:
834
973
  self.client.u2.screenshot(str(screenshot_path))
835
974
  info = self.client.u2.info
@@ -907,7 +1046,7 @@ class BasicMobileToolsLite:
907
1046
  size = ios_client.wda.window_size()
908
1047
  screen_width, screen_height = size[0], size[1]
909
1048
  else:
910
- return {"success": False, "message": "iOS 客户端未初始化"}
1049
+ return {"success": False, "msg": "iOS未初始化"}
911
1050
  else:
912
1051
  info = self.client.u2.info
913
1052
  screen_width = info.get('displayWidth', 0)
@@ -951,37 +1090,45 @@ class BasicMobileToolsLite:
951
1090
  x_percent = round(x / screen_width * 100, 1) if screen_width > 0 else 0
952
1091
  y_percent = round(y / screen_height * 100, 1) if screen_height > 0 else 0
953
1092
 
954
- # 记录操作(包含屏幕尺寸和百分比,便于脚本生成时转换)
955
- self._record_operation(
956
- 'click',
957
- x=x,
958
- y=y,
959
- x_percent=x_percent,
960
- y_percent=y_percent,
961
- screen_width=screen_width,
962
- screen_height=screen_height,
963
- ref=f"coords_{x}_{y}"
964
- )
1093
+ # 使用标准记录格式:坐标点击用百分比作为定位方式(跨分辨率兼容)
1094
+ self._record_click('percent', f"{x_percent}%,{y_percent}%", x_percent, y_percent,
1095
+ element_desc=f"坐标({x},{y})")
1096
+
1097
+ # 🎯 关键步骤:检查应用是否跳转,如果跳转则自动返回目标应用
1098
+ app_check = self._check_app_switched()
1099
+ return_result = None
965
1100
 
1101
+ if app_check['switched']:
1102
+ # 应用已跳转,尝试返回目标应用
1103
+ return_result = self._return_to_target_app()
1104
+
1105
+ # 构建返回消息
966
1106
  if converted:
967
1107
  if conversion_type == "crop_offset":
968
- return {
969
- "success": True,
970
- "message": f"✅ 点击成功: ({x}, {y})\n"
971
- f" 🔍 局部截图坐标转换: ({original_x},{original_y}) + 偏移({crop_offset_x},{crop_offset_y}) → ({x},{y})"
972
- }
1108
+ msg = f"✅ 点击成功: ({x}, {y})\n" \
1109
+ f" 🔍 局部截图坐标转换: ({original_x},{original_y}) + 偏移({crop_offset_x},{crop_offset_y}) → ({x},{y})"
973
1110
  else:
974
- return {
975
- "success": True,
976
- "message": f" 点击成功: ({x}, {y})\n"
977
- f" 📐 坐标已转换: ({original_x},{original_y}) → ({x},{y})\n"
978
- f" 🖼️ 图片尺寸: {image_width}x{image_height} → 屏幕: {screen_width}x{screen_height}"
979
- }
1111
+ msg = f"✅ 点击成功: ({x}, {y})\n" \
1112
+ f" 📐 坐标已转换: ({original_x},{original_y}) → ({x},{y})\n" \
1113
+ f" 🖼️ 图片尺寸: {image_width}x{image_height} → 屏幕: {screen_width}x{screen_height}"
980
1114
  else:
981
- return {
982
- "success": True,
983
- "message": f"✅ 点击成功: ({x}, {y}) [相对位置: {x_percent}%, {y_percent}%]"
984
- }
1115
+ msg = f"✅ 点击成功: ({x}, {y}) [相对位置: {x_percent}%, {y_percent}%]"
1116
+
1117
+ # 如果检测到应用跳转,添加警告和返回结果
1118
+ if app_check['switched']:
1119
+ msg += f"\n{app_check['message']}"
1120
+ if return_result:
1121
+ if return_result['success']:
1122
+ msg += f"\n{return_result['message']}"
1123
+ else:
1124
+ msg += f"\n❌ 自动返回失败: {return_result['message']}"
1125
+
1126
+ return {
1127
+ "success": True,
1128
+ "message": msg,
1129
+ "app_check": app_check,
1130
+ "return_to_app": return_result
1131
+ }
985
1132
  except Exception as e:
986
1133
  return {"success": False, "message": f"❌ 点击失败: {e}"}
987
1134
 
@@ -1014,14 +1161,14 @@ class BasicMobileToolsLite:
1014
1161
  size = ios_client.wda.window_size()
1015
1162
  width, height = size[0], size[1]
1016
1163
  else:
1017
- return {"success": False, "message": "iOS 客户端未初始化"}
1164
+ return {"success": False, "msg": "iOS未初始化"}
1018
1165
  else:
1019
1166
  info = self.client.u2.info
1020
1167
  width = info.get('displayWidth', 0)
1021
1168
  height = info.get('displayHeight', 0)
1022
1169
 
1023
1170
  if width == 0 or height == 0:
1024
- return {"success": False, "message": "无法获取屏幕尺寸"}
1171
+ return {"success": False, "msg": "无法获取屏幕尺寸"}
1025
1172
 
1026
1173
  # 第2步:百分比转像素坐标
1027
1174
  # 公式:像素 = 屏幕尺寸 × (百分比 / 100)
@@ -1036,30 +1183,29 @@ class BasicMobileToolsLite:
1036
1183
 
1037
1184
  time.sleep(0.3)
1038
1185
 
1039
- # 第4步:记录操作(同时记录百分比和像素)
1040
- self._record_operation(
1041
- 'click',
1042
- x=x,
1043
- y=y,
1044
- x_percent=x_percent,
1045
- y_percent=y_percent,
1046
- screen_width=width,
1047
- screen_height=height,
1048
- ref=f"percent_{x_percent}_{y_percent}"
1049
- )
1186
+ # 第4步:使用标准记录格式
1187
+ self._record_click('percent', f"{x_percent}%,{y_percent}%", x_percent, y_percent,
1188
+ element_desc=f"百分比({x_percent}%,{y_percent}%)")
1050
1189
 
1051
1190
  return {
1052
1191
  "success": True,
1053
- "message": f"✅ 百分比点击成功: ({x_percent}%, {y_percent}%) → 像素({x}, {y})",
1054
- "screen_size": {"width": width, "height": height},
1055
- "percent": {"x": x_percent, "y": y_percent},
1056
1192
  "pixel": {"x": x, "y": y}
1057
1193
  }
1058
1194
  except Exception as e:
1059
1195
  return {"success": False, "message": f"❌ 百分比点击失败: {e}"}
1060
1196
 
1061
- def click_by_text(self, text: str, timeout: float = 3.0) -> Dict:
1062
- """通过文本点击 - 先查 XML 树,再精准匹配"""
1197
+ def click_by_text(self, text: str, timeout: float = 3.0, position: Optional[str] = None,
1198
+ verify: Optional[str] = None) -> Dict:
1199
+ """通过文本点击 - 先查 XML 树,再精准匹配
1200
+
1201
+ Args:
1202
+ text: 元素的文本内容
1203
+ timeout: 超时时间
1204
+ position: 位置信息,当有多个相同文案时使用。支持:
1205
+ - 垂直方向: "top"/"upper"/"上", "bottom"/"lower"/"下", "middle"/"center"/"中"
1206
+ - 水平方向: "left"/"左", "right"/"右", "center"/"中"
1207
+ verify: 可选,点击后验证的文本。如果指定,会检查该文本是否出现在页面上
1208
+ """
1063
1209
  try:
1064
1210
  if self._is_ios():
1065
1211
  ios_client = self._get_ios_client()
@@ -1070,19 +1216,53 @@ class BasicMobileToolsLite:
1070
1216
  if elem.exists:
1071
1217
  elem.click()
1072
1218
  time.sleep(0.3)
1073
- self._record_operation('click', element=text, ref=text)
1074
- return {"success": True, "message": f"✅ 点击成功: '{text}'"}
1075
- return {"success": False, "message": f"❌ 文本不存在: {text}"}
1219
+ self._record_click('text', text, element_desc=text, locator_attr='text')
1220
+ # 验证逻辑
1221
+ if verify:
1222
+ return self._verify_after_click(verify, ios=True)
1223
+ # 返回页面文本摘要,方便确认页面变化
1224
+ page_texts = self._get_page_texts(10)
1225
+ return {"success": True, "page_texts": page_texts}
1226
+ # 控件树找不到,提示用视觉识别
1227
+ return {"success": False, "fallback": "vision", "msg": f"未找到'{text}',用截图点击"}
1228
+ else:
1229
+ return {"success": False, "msg": "iOS未初始化"}
1076
1230
  else:
1231
+ # 获取屏幕尺寸用于计算百分比
1232
+ screen_width, screen_height = self.client.u2.window_size()
1233
+
1077
1234
  # 🔍 先查 XML 树,找到元素及其属性
1078
- found_elem = self._find_element_in_tree(text)
1235
+ found_elem = self._find_element_in_tree(text, position=position)
1079
1236
 
1080
1237
  if found_elem:
1081
1238
  attr_type = found_elem['attr_type']
1082
1239
  attr_value = found_elem['attr_value']
1083
1240
  bounds = found_elem.get('bounds')
1084
1241
 
1085
- # 根据找到的属性类型,使用对应的选择器
1242
+ # 计算百分比坐标作为兜底
1243
+ x_pct, y_pct = 0, 0
1244
+ if bounds:
1245
+ cx = (bounds[0] + bounds[2]) // 2
1246
+ cy = (bounds[1] + bounds[3]) // 2
1247
+ x_pct = round(cx / screen_width * 100, 1)
1248
+ y_pct = round(cy / screen_height * 100, 1)
1249
+
1250
+ # 如果有位置参数,直接使用坐标点击
1251
+ if position and bounds:
1252
+ x = (bounds[0] + bounds[2]) // 2
1253
+ y = (bounds[1] + bounds[3]) // 2
1254
+ self.client.u2.click(x, y)
1255
+ time.sleep(0.3)
1256
+ self._record_click('text', attr_value, x_pct, y_pct,
1257
+ element_desc=f"{text}({position})", locator_attr=attr_type)
1258
+ # 验证逻辑
1259
+ if verify:
1260
+ return self._verify_after_click(verify)
1261
+ # 返回页面文本摘要
1262
+ page_texts = self._get_page_texts(10)
1263
+ return {"success": True, "page_texts": page_texts}
1264
+
1265
+ # 没有位置参数时,使用选择器定位
1086
1266
  if attr_type == 'text':
1087
1267
  elem = self.client.u2(text=attr_value)
1088
1268
  elif attr_type == 'textContains':
@@ -1097,33 +1277,98 @@ class BasicMobileToolsLite:
1097
1277
  if elem and elem.exists(timeout=1):
1098
1278
  elem.click()
1099
1279
  time.sleep(0.3)
1100
- self._record_operation('click', element=text, ref=f"{attr_type}:{attr_value}")
1101
- return {"success": True, "message": f"✅ 点击成功({attr_type}): '{text}'"}
1280
+ self._record_click('text', attr_value, x_pct, y_pct,
1281
+ element_desc=text, locator_attr=attr_type)
1282
+ # 验证逻辑
1283
+ if verify:
1284
+ return self._verify_after_click(verify)
1285
+ # 返回页面文本摘要
1286
+ page_texts = self._get_page_texts(10)
1287
+ return {"success": True, "page_texts": page_texts}
1102
1288
 
1103
- # 如果选择器失败,用坐标兜底
1289
+ # 选择器失败,用坐标兜底
1104
1290
  if bounds:
1105
1291
  x = (bounds[0] + bounds[2]) // 2
1106
1292
  y = (bounds[1] + bounds[3]) // 2
1107
1293
  self.client.u2.click(x, y)
1108
1294
  time.sleep(0.3)
1109
- self._record_operation('click', element=text, x=x, y=y, ref=f"coords:{x},{y}")
1110
- return {"success": True, "message": f"✅ 点击成功(坐标兜底): '{text}' @ ({x},{y})"}
1295
+ self._record_click('percent', f"{x_pct}%,{y_pct}%", x_pct, y_pct,
1296
+ element_desc=text)
1297
+ # 验证逻辑
1298
+ if verify:
1299
+ return self._verify_after_click(verify)
1300
+ # 返回页面文本摘要
1301
+ page_texts = self._get_page_texts(10)
1302
+ return {"success": True, "page_texts": page_texts}
1111
1303
 
1112
- return {"success": False, "message": f"❌ 文本不存在: {text}"}
1304
+ # 控件树找不到,提示用视觉识别
1305
+ return {"success": False, "fallback": "vision", "msg": f"未找到'{text}',用截图点击"}
1113
1306
  except Exception as e:
1114
- return {"success": False, "message": f"❌ 点击失败: {e}"}
1307
+ return {"success": False, "msg": str(e)}
1308
+
1309
+ def _verify_after_click(self, verify_text: str, ios: bool = False, timeout: float = 2.0) -> Dict:
1310
+ """点击后验证期望文本是否出现
1311
+
1312
+ Args:
1313
+ verify_text: 期望出现的文本
1314
+ ios: 是否是 iOS 设备
1315
+ timeout: 验证超时时间
1316
+
1317
+ Returns:
1318
+ {"success": True, "verified": True/False, "hint": "..."}
1319
+ """
1320
+ time.sleep(0.5) # 等待页面更新
1321
+
1322
+ try:
1323
+ if ios:
1324
+ ios_client = self._get_ios_client()
1325
+ if ios_client and hasattr(ios_client, 'wda'):
1326
+ exists = ios_client.wda(name=verify_text).exists or \
1327
+ ios_client.wda(label=verify_text).exists
1328
+ else:
1329
+ exists = False
1330
+ else:
1331
+ # Android: 检查文本或包含文本
1332
+ exists = self.client.u2(text=verify_text).exists(timeout=timeout) or \
1333
+ self.client.u2(textContains=verify_text).exists(timeout=0.5) or \
1334
+ self.client.u2(description=verify_text).exists(timeout=0.5)
1335
+
1336
+ if exists:
1337
+ return {"success": True, "verified": True}
1338
+ else:
1339
+ # 验证失败,提示可以截图确认
1340
+ return {
1341
+ "success": True, # 点击本身成功
1342
+ "verified": False,
1343
+ "expect": verify_text,
1344
+ "hint": "验证失败,可截图确认"
1345
+ }
1346
+ except Exception as e:
1347
+ return {"success": True, "verified": False, "hint": f"验证异常: {e}"}
1115
1348
 
1116
- def _find_element_in_tree(self, text: str) -> Optional[Dict]:
1117
- """在 XML 树中查找包含指定文本的元素(使用完整 UI 层级)"""
1349
+ def _find_element_in_tree(self, text: str, position: Optional[str] = None) -> Optional[Dict]:
1350
+ """在 XML 树中查找包含指定文本的元素,优先返回可点击的元素
1351
+
1352
+ Args:
1353
+ text: 要查找的文本
1354
+ position: 位置信息,用于在有多个相同文案时筛选
1355
+ """
1118
1356
  try:
1119
- xml = self._get_full_hierarchy()
1357
+ xml = self.client.u2.dump_hierarchy(compressed=False)
1120
1358
  import xml.etree.ElementTree as ET
1121
1359
  root = ET.fromstring(xml)
1122
1360
 
1361
+ # 获取屏幕尺寸
1362
+ screen_width, screen_height = self.client.u2.window_size()
1363
+
1364
+ # 存储所有匹配的元素(包括不可点击的)
1365
+ matched_elements = []
1366
+
1123
1367
  for elem in root.iter():
1124
1368
  elem_text = elem.attrib.get('text', '')
1125
1369
  elem_desc = elem.attrib.get('content-desc', '')
1126
1370
  bounds_str = elem.attrib.get('bounds', '')
1371
+ clickable = elem.attrib.get('clickable', 'false').lower() == 'true'
1127
1372
 
1128
1373
  # 解析 bounds
1129
1374
  bounds = None
@@ -1133,36 +1378,113 @@ class BasicMobileToolsLite:
1133
1378
  if len(match) == 4:
1134
1379
  bounds = [int(x) for x in match]
1135
1380
 
1381
+ # 判断是否匹配
1382
+ is_match = False
1383
+ attr_type = None
1384
+ attr_value = None
1385
+
1136
1386
  # 精确匹配 text
1137
1387
  if elem_text == text:
1138
- return {'attr_type': 'text', 'attr_value': text, 'bounds': bounds}
1139
-
1388
+ is_match = True
1389
+ attr_type = 'text'
1390
+ attr_value = text
1140
1391
  # 精确匹配 content-desc
1141
- if elem_desc == text:
1142
- return {'attr_type': 'description', 'attr_value': text, 'bounds': bounds}
1143
-
1392
+ elif elem_desc == text:
1393
+ is_match = True
1394
+ attr_type = 'description'
1395
+ attr_value = text
1144
1396
  # 模糊匹配 text
1145
- if text in elem_text:
1146
- return {'attr_type': 'textContains', 'attr_value': text, 'bounds': bounds}
1147
-
1397
+ elif text in elem_text:
1398
+ is_match = True
1399
+ attr_type = 'textContains'
1400
+ attr_value = text
1148
1401
  # 模糊匹配 content-desc
1149
- if text in elem_desc:
1150
- return {'attr_type': 'descriptionContains', 'attr_value': text, 'bounds': bounds}
1402
+ elif text in elem_desc:
1403
+ is_match = True
1404
+ attr_type = 'descriptionContains'
1405
+ attr_value = text
1406
+
1407
+ if is_match and bounds:
1408
+ # 计算元素的中心点坐标
1409
+ center_x = (bounds[0] + bounds[2]) / 2
1410
+ center_y = (bounds[1] + bounds[3]) / 2
1411
+
1412
+ matched_elements.append({
1413
+ 'attr_type': attr_type,
1414
+ 'attr_value': attr_value,
1415
+ 'bounds': bounds,
1416
+ 'clickable': clickable,
1417
+ 'center_x': center_x,
1418
+ 'center_y': center_y
1419
+ })
1420
+
1421
+ if not matched_elements:
1422
+ return None
1423
+
1424
+ # 如果有位置信息,根据位置筛选
1425
+ if position and len(matched_elements) > 1:
1426
+ position_lower = position.lower()
1427
+
1428
+ # 根据位置信息排序
1429
+ if position_lower in ['top', 'upper', '上', '上方']:
1430
+ # 选择 y 坐标最小的(最上面的)
1431
+ matched_elements = sorted(matched_elements, key=lambda x: x['center_y'])
1432
+ elif position_lower in ['bottom', 'lower', '下', '下方', '底部']:
1433
+ # 选择 y 坐标最大的(最下面的)
1434
+ matched_elements = sorted(matched_elements, key=lambda x: x['center_y'], reverse=True)
1435
+ elif position_lower in ['left', '左', '左侧']:
1436
+ # 选择 x 坐标最小的(最左边的)
1437
+ matched_elements = sorted(matched_elements, key=lambda x: x['center_x'])
1438
+ elif position_lower in ['right', '右', '右侧']:
1439
+ # 选择 x 坐标最大的(最右边的)
1440
+ matched_elements = sorted(matched_elements, key=lambda x: x['center_x'], reverse=True)
1441
+ elif position_lower in ['middle', 'center', '中', '中间']:
1442
+ # 选择最接近屏幕中心的
1443
+ screen_mid_x = screen_width / 2
1444
+ screen_mid_y = screen_height / 2
1445
+ matched_elements = sorted(
1446
+ matched_elements,
1447
+ key=lambda x: abs(x['center_x'] - screen_mid_x) + abs(x['center_y'] - screen_mid_y)
1448
+ )
1449
+
1450
+ # 如果有位置信息,优先返回排序后的第一个元素(最符合位置要求的)
1451
+ # 如果没有位置信息,优先返回可点击的元素
1452
+ if position and matched_elements:
1453
+ # 有位置信息时,直接返回排序后的第一个(最符合位置要求的)
1454
+ first_match = matched_elements[0]
1455
+ return {
1456
+ 'attr_type': first_match['attr_type'],
1457
+ 'attr_value': first_match['attr_value'],
1458
+ 'bounds': first_match['bounds']
1459
+ }
1460
+
1461
+ # 没有位置信息时,优先返回可点击的元素
1462
+ for match in matched_elements:
1463
+ if match['clickable']:
1464
+ return {
1465
+ 'attr_type': match['attr_type'],
1466
+ 'attr_value': match['attr_value'],
1467
+ 'bounds': match['bounds']
1468
+ }
1469
+
1470
+ # 如果没有可点击的元素,直接返回第一个匹配元素的 bounds(使用坐标点击)
1471
+ if matched_elements:
1472
+ first_match = matched_elements[0]
1473
+ return {
1474
+ 'attr_type': first_match['attr_type'],
1475
+ 'attr_value': first_match['attr_value'],
1476
+ 'bounds': first_match['bounds']
1477
+ }
1151
1478
 
1152
1479
  return None
1153
- except Exception:
1480
+ except Exception as e:
1481
+ import traceback
1482
+ traceback.print_exc()
1154
1483
  return None
1155
1484
 
1156
1485
  def click_by_id(self, resource_id: str, index: int = 0) -> Dict:
1157
- """通过 resource-id 点击(支持点击第 N 个元素)
1158
-
1159
- Args:
1160
- resource_id: 元素的 resource-id
1161
- index: 第几个元素(从 0 开始),默认 0 表示第一个
1162
- """
1486
+ """通过 resource-id 点击"""
1163
1487
  try:
1164
- index_desc = f"[{index}]" if index > 0 else ""
1165
-
1166
1488
  if self._is_ios():
1167
1489
  ios_client = self._get_ios_client()
1168
1490
  if ios_client and hasattr(ios_client, 'wda'):
@@ -1170,31 +1492,31 @@ class BasicMobileToolsLite:
1170
1492
  if not elem.exists:
1171
1493
  elem = ios_client.wda(name=resource_id)
1172
1494
  if elem.exists:
1173
- # 获取所有匹配的元素
1174
1495
  elements = elem.find_elements()
1175
1496
  if index < len(elements):
1176
1497
  elements[index].click()
1177
1498
  time.sleep(0.3)
1178
- self._record_operation('click', element=f"{resource_id}{index_desc}", ref=resource_id, index=index)
1179
- return {"success": True, "message": f"✅ 点击成功: {resource_id}{index_desc}"}
1499
+ self._record_click('id', resource_id, element_desc=resource_id)
1500
+ return {"success": True}
1180
1501
  else:
1181
- return {"success": False, "message": f"❌ 索引超出范围: 找到 {len(elements)} 个元素,但请求索引 {index}"}
1182
- return {"success": False, "message": f" 元素不存在: {resource_id}"}
1502
+ return {"success": False, "msg": f"索引{index}超出范围(共{len(elements)}个)"}
1503
+ return {"success": False, "fallback": "vision", "msg": f"未找到ID'{resource_id}'"}
1504
+ else:
1505
+ return {"success": False, "msg": "iOS未初始化"}
1183
1506
  else:
1184
1507
  elem = self.client.u2(resourceId=resource_id)
1185
1508
  if elem.exists(timeout=0.5):
1186
- # 获取匹配元素数量
1187
1509
  count = elem.count
1188
1510
  if index < count:
1189
1511
  elem[index].click()
1190
1512
  time.sleep(0.3)
1191
- self._record_operation('click', element=f"{resource_id}{index_desc}", ref=resource_id, index=index)
1192
- return {"success": True, "message": f"✅ 点击成功: {resource_id}{index_desc}" + (f" (共 {count} 个)" if count > 1 else "")}
1513
+ self._record_click('id', resource_id, element_desc=resource_id)
1514
+ return {"success": True}
1193
1515
  else:
1194
- return {"success": False, "message": f"❌ 索引超出范围: 找到 {count} 个元素,但请求索引 {index}"}
1195
- return {"success": False, "message": f" 元素不存在: {resource_id}"}
1516
+ return {"success": False, "msg": f"索引{index}超出范围(共{count}个)"}
1517
+ return {"success": False, "fallback": "vision", "msg": f"未找到ID'{resource_id}'"}
1196
1518
  except Exception as e:
1197
- return {"success": False, "message": f"❌ 点击失败: {e}"}
1519
+ return {"success": False, "msg": str(e)}
1198
1520
 
1199
1521
  # ==================== 长按操作 ====================
1200
1522
 
@@ -1228,7 +1550,7 @@ class BasicMobileToolsLite:
1228
1550
  size = ios_client.wda.window_size()
1229
1551
  screen_width, screen_height = size[0], size[1]
1230
1552
  else:
1231
- return {"success": False, "message": "iOS 客户端未初始化"}
1553
+ return {"success": False, "msg": "iOS未初始化"}
1232
1554
  else:
1233
1555
  info = self.client.u2.info
1234
1556
  screen_width = info.get('displayWidth', 0)
@@ -1275,38 +1597,17 @@ class BasicMobileToolsLite:
1275
1597
  x_percent = round(x / screen_width * 100, 1) if screen_width > 0 else 0
1276
1598
  y_percent = round(y / screen_height * 100, 1) if screen_height > 0 else 0
1277
1599
 
1278
- # 记录操作
1279
- self._record_operation(
1280
- 'long_press',
1281
- x=x,
1282
- y=y,
1283
- x_percent=x_percent,
1284
- y_percent=y_percent,
1285
- duration=duration,
1286
- screen_width=screen_width,
1287
- screen_height=screen_height,
1288
- ref=f"coords_{x}_{y}"
1289
- )
1600
+ # 使用标准记录格式
1601
+ self._record_long_press('percent', f"{x_percent}%,{y_percent}%", duration,
1602
+ x_percent, y_percent, element_desc=f"坐标({x},{y})")
1290
1603
 
1291
1604
  if converted:
1292
1605
  if conversion_type == "crop_offset":
1293
- return {
1294
- "success": True,
1295
- "message": f"✅ 长按成功: ({x}, {y}) 持续 {duration}s\n"
1296
- f" 🔍 局部截图坐标转换: ({original_x},{original_y}) + 偏移({crop_offset_x},{crop_offset_y}) → ({x},{y})"
1297
- }
1606
+ return {"success": True}
1298
1607
  else:
1299
- return {
1300
- "success": True,
1301
- "message": f"✅ 长按成功: ({x}, {y}) 持续 {duration}s\n"
1302
- f" 📐 坐标已转换: ({original_x},{original_y}) → ({x},{y})\n"
1303
- f" 🖼️ 图片尺寸: {image_width}x{image_height} → 屏幕: {screen_width}x{screen_height}"
1304
- }
1608
+ return {"success": True}
1305
1609
  else:
1306
- return {
1307
- "success": True,
1308
- "message": f"✅ 长按成功: ({x}, {y}) 持续 {duration}s [相对位置: {x_percent}%, {y_percent}%]"
1309
- }
1610
+ return {"success": True}
1310
1611
  except Exception as e:
1311
1612
  return {"success": False, "message": f"❌ 长按失败: {e}"}
1312
1613
 
@@ -1335,14 +1636,14 @@ class BasicMobileToolsLite:
1335
1636
  size = ios_client.wda.window_size()
1336
1637
  width, height = size[0], size[1]
1337
1638
  else:
1338
- return {"success": False, "message": "iOS 客户端未初始化"}
1639
+ return {"success": False, "msg": "iOS未初始化"}
1339
1640
  else:
1340
1641
  info = self.client.u2.info
1341
1642
  width = info.get('displayWidth', 0)
1342
1643
  height = info.get('displayHeight', 0)
1343
1644
 
1344
1645
  if width == 0 or height == 0:
1345
- return {"success": False, "message": "无法获取屏幕尺寸"}
1646
+ return {"success": False, "msg": "无法获取屏幕尺寸"}
1346
1647
 
1347
1648
  # 第2步:百分比转像素坐标
1348
1649
  x = int(width * x_percent / 100)
@@ -1360,26 +1661,11 @@ class BasicMobileToolsLite:
1360
1661
 
1361
1662
  time.sleep(0.3)
1362
1663
 
1363
- # 第4步:记录操作
1364
- self._record_operation(
1365
- 'long_press',
1366
- x=x,
1367
- y=y,
1368
- x_percent=x_percent,
1369
- y_percent=y_percent,
1370
- duration=duration,
1371
- screen_width=width,
1372
- screen_height=height,
1373
- ref=f"percent_{x_percent}_{y_percent}"
1374
- )
1664
+ # 第4步:使用标准记录格式
1665
+ self._record_long_press('percent', f"{x_percent}%,{y_percent}%", duration,
1666
+ x_percent, y_percent, element_desc=f"百分比({x_percent}%,{y_percent}%)")
1375
1667
 
1376
- return {
1377
- "success": True,
1378
- "message": f"✅ 百分比长按成功: ({x_percent}%, {y_percent}%) → 像素({x}, {y}) 持续 {duration}s",
1379
- "screen_size": {"width": width, "height": height},
1380
- "percent": {"x": x_percent, "y": y_percent},
1381
- "pixel": {"x": x, "y": y},
1382
- "duration": duration
1668
+ return {"success": True
1383
1669
  }
1384
1670
  except Exception as e:
1385
1671
  return {"success": False, "message": f"❌ 百分比长按失败: {e}"}
@@ -1408,10 +1694,13 @@ class BasicMobileToolsLite:
1408
1694
  else:
1409
1695
  ios_client.wda.swipe(x, y, x, y, duration=duration)
1410
1696
  time.sleep(0.3)
1411
- self._record_operation('long_press', element=text, duration=duration, ref=text)
1412
- return {"success": True, "message": f"✅ 长按成功: '{text}' 持续 {duration}s"}
1413
- return {"success": False, "message": f"❌ 文本不存在: {text}"}
1697
+ self._record_long_press('text', text, duration, element_desc=text, locator_attr='text')
1698
+ return {"success": True}
1699
+ return {"success": False, "msg": f"未找到'{text}'"}
1414
1700
  else:
1701
+ # 获取屏幕尺寸用于计算百分比
1702
+ screen_width, screen_height = self.client.u2.window_size()
1703
+
1415
1704
  # 先查 XML 树,找到元素
1416
1705
  found_elem = self._find_element_in_tree(text)
1417
1706
 
@@ -1420,6 +1709,14 @@ class BasicMobileToolsLite:
1420
1709
  attr_value = found_elem['attr_value']
1421
1710
  bounds = found_elem.get('bounds')
1422
1711
 
1712
+ # 计算百分比坐标作为兜底
1713
+ x_pct, y_pct = 0, 0
1714
+ if bounds:
1715
+ cx = (bounds[0] + bounds[2]) // 2
1716
+ cy = (bounds[1] + bounds[3]) // 2
1717
+ x_pct = round(cx / screen_width * 100, 1)
1718
+ y_pct = round(cy / screen_height * 100, 1)
1719
+
1423
1720
  # 根据找到的属性类型,使用对应的选择器
1424
1721
  if attr_type == 'text':
1425
1722
  elem = self.client.u2(text=attr_value)
@@ -1435,8 +1732,9 @@ class BasicMobileToolsLite:
1435
1732
  if elem and elem.exists(timeout=1):
1436
1733
  elem.long_click(duration=duration)
1437
1734
  time.sleep(0.3)
1438
- self._record_operation('long_press', element=text, duration=duration, ref=f"{attr_type}:{attr_value}")
1439
- return {"success": True, "message": f"✅ 长按成功({attr_type}): '{text}' 持续 {duration}s"}
1735
+ self._record_long_press('text', attr_value, duration, x_pct, y_pct,
1736
+ element_desc=text, locator_attr=attr_type)
1737
+ return {"success": True}
1440
1738
 
1441
1739
  # 如果选择器失败,用坐标兜底
1442
1740
  if bounds:
@@ -1444,10 +1742,11 @@ class BasicMobileToolsLite:
1444
1742
  y = (bounds[1] + bounds[3]) // 2
1445
1743
  self.client.u2.long_click(x, y, duration=duration)
1446
1744
  time.sleep(0.3)
1447
- self._record_operation('long_press', element=text, x=x, y=y, duration=duration, ref=f"coords:{x},{y}")
1448
- return {"success": True, "message": f"✅ 长按成功(坐标兜底): '{text}' @ ({x},{y}) 持续 {duration}s"}
1745
+ self._record_long_press('percent', f"{x_pct}%,{y_pct}%", duration, x_pct, y_pct,
1746
+ element_desc=text)
1747
+ return {"success": True}
1449
1748
 
1450
- return {"success": False, "message": f"❌ 文本不存在: {text}"}
1749
+ return {"success": False, "msg": f"未找到'{text}'"}
1451
1750
  except Exception as e:
1452
1751
  return {"success": False, "message": f"❌ 长按失败: {e}"}
1453
1752
 
@@ -1474,17 +1773,17 @@ class BasicMobileToolsLite:
1474
1773
  else:
1475
1774
  ios_client.wda.swipe(x, y, x, y, duration=duration)
1476
1775
  time.sleep(0.3)
1477
- self._record_operation('long_press', element=resource_id, duration=duration, ref=resource_id)
1478
- return {"success": True, "message": f"✅ 长按成功: {resource_id} 持续 {duration}s"}
1479
- return {"success": False, "message": f"❌ 元素不存在: {resource_id}"}
1776
+ self._record_long_press('id', resource_id, duration, element_desc=resource_id)
1777
+ return {"success": True}
1778
+ return {"success": False, "msg": f"未找到'{resource_id}'"}
1480
1779
  else:
1481
1780
  elem = self.client.u2(resourceId=resource_id)
1482
1781
  if elem.exists(timeout=0.5):
1483
1782
  elem.long_click(duration=duration)
1484
1783
  time.sleep(0.3)
1485
- self._record_operation('long_press', element=resource_id, duration=duration, ref=resource_id)
1784
+ self._record_long_press('id', resource_id, duration, element_desc=resource_id)
1486
1785
  return {"success": True, "message": f"✅ 长按成功: {resource_id} 持续 {duration}s"}
1487
- return {"success": False, "message": f"❌ 元素不存在: {resource_id}"}
1786
+ return {"success": False, "msg": f"未找到'{resource_id}'"}
1488
1787
  except Exception as e:
1489
1788
  return {"success": False, "message": f"❌ 长按失败: {e}"}
1490
1789
 
@@ -1509,8 +1808,29 @@ class BasicMobileToolsLite:
1509
1808
  if elem.exists:
1510
1809
  elem.set_text(text)
1511
1810
  time.sleep(0.3)
1512
- self._record_operation('input', element=resource_id, ref=resource_id, text=text)
1513
- return {"success": True, "message": f"✅ 输入成功: '{text}'"}
1811
+ self._record_input(text, 'id', resource_id)
1812
+
1813
+ # 🎯 关键步骤:检查应用是否跳转,如果跳转则自动返回目标应用
1814
+ app_check = self._check_app_switched()
1815
+ return_result = None
1816
+ if app_check['switched']:
1817
+ return_result = self._return_to_target_app()
1818
+
1819
+ msg = f"✅ 输入成功: '{text}'"
1820
+ if app_check['switched']:
1821
+ msg += f"\n{app_check['message']}"
1822
+ if return_result:
1823
+ if return_result['success']:
1824
+ msg += f"\n{return_result['message']}"
1825
+ else:
1826
+ msg += f"\n❌ 自动返回失败: {return_result['message']}"
1827
+
1828
+ return {
1829
+ "success": True,
1830
+ "message": msg,
1831
+ "app_check": app_check,
1832
+ "return_to_app": return_result
1833
+ }
1514
1834
  return {"success": False, "message": f"❌ 输入框不存在: {resource_id}"}
1515
1835
  else:
1516
1836
  elements = self.client.u2(resourceId=resource_id)
@@ -1523,8 +1843,29 @@ class BasicMobileToolsLite:
1523
1843
  if count == 1:
1524
1844
  elements.set_text(text)
1525
1845
  time.sleep(0.3)
1526
- self._record_operation('input', element=resource_id, ref=resource_id, text=text)
1527
- return {"success": True, "message": f"✅ 输入成功: '{text}'"}
1846
+ self._record_input(text, 'id', resource_id)
1847
+
1848
+ # 🎯 关键步骤:检查应用是否跳转,如果跳转则自动返回目标应用
1849
+ app_check = self._check_app_switched()
1850
+ return_result = None
1851
+ if app_check['switched']:
1852
+ return_result = self._return_to_target_app()
1853
+
1854
+ msg = f"✅ 输入成功: '{text}'"
1855
+ if app_check['switched']:
1856
+ msg += f"\n{app_check['message']}"
1857
+ if return_result:
1858
+ if return_result['success']:
1859
+ msg += f"\n{return_result['message']}"
1860
+ else:
1861
+ msg += f"\n❌ 自动返回失败: {return_result['message']}"
1862
+
1863
+ return {
1864
+ "success": True,
1865
+ "message": msg,
1866
+ "app_check": app_check,
1867
+ "return_to_app": return_result
1868
+ }
1528
1869
 
1529
1870
  # 多个相同 ID(<=5个),尝试智能选择
1530
1871
  if count <= 5:
@@ -1536,15 +1877,57 @@ class BasicMobileToolsLite:
1536
1877
  if info.get('editable') or info.get('focusable'):
1537
1878
  elem.set_text(text)
1538
1879
  time.sleep(0.3)
1539
- self._record_operation('input', element=resource_id, ref=resource_id, text=text)
1540
- return {"success": True, "message": f"✅ 输入成功: '{text}'"}
1880
+ self._record_input(text, 'id', resource_id)
1881
+
1882
+ # 🎯 关键步骤:检查应用是否跳转,如果跳转则自动返回目标应用
1883
+ app_check = self._check_app_switched()
1884
+ return_result = None
1885
+ if app_check['switched']:
1886
+ return_result = self._return_to_target_app()
1887
+
1888
+ msg = f"✅ 输入成功: '{text}'"
1889
+ if app_check['switched']:
1890
+ msg += f"\n{app_check['message']}"
1891
+ if return_result:
1892
+ if return_result['success']:
1893
+ msg += f"\n{return_result['message']}"
1894
+ else:
1895
+ msg += f"\n❌ 自动返回失败: {return_result['message']}"
1896
+
1897
+ return {
1898
+ "success": True,
1899
+ "message": msg,
1900
+ "app_check": app_check,
1901
+ "return_to_app": return_result
1902
+ }
1541
1903
  except:
1542
1904
  continue
1543
1905
  # 没找到可编辑的,用第一个
1544
1906
  elements[0].set_text(text)
1545
1907
  time.sleep(0.3)
1546
- self._record_operation('input', element=resource_id, ref=resource_id, text=text)
1547
- return {"success": True, "message": f"✅ 输入成功: '{text}'"}
1908
+ self._record_input(text, 'id', resource_id)
1909
+
1910
+ # 🎯 关键步骤:检查应用是否跳转,如果跳转则自动返回目标应用
1911
+ app_check = self._check_app_switched()
1912
+ return_result = None
1913
+ if app_check['switched']:
1914
+ return_result = self._return_to_target_app()
1915
+
1916
+ msg = f"✅ 输入成功: '{text}'"
1917
+ if app_check['switched']:
1918
+ msg += f"\n{app_check['message']}"
1919
+ if return_result:
1920
+ if return_result['success']:
1921
+ msg += f"\n{return_result['message']}"
1922
+ else:
1923
+ msg += f"\n❌ 自动返回失败: {return_result['message']}"
1924
+
1925
+ return {
1926
+ "success": True,
1927
+ "message": msg,
1928
+ "app_check": app_check,
1929
+ "return_to_app": return_result
1930
+ }
1548
1931
 
1549
1932
  # ID 不可靠(不存在或太多),改用 EditText 类型定位
1550
1933
  edit_texts = self.client.u2(className='android.widget.EditText')
@@ -1553,8 +1936,29 @@ class BasicMobileToolsLite:
1553
1936
  if et_count == 1:
1554
1937
  edit_texts.set_text(text)
1555
1938
  time.sleep(0.3)
1556
- self._record_operation('input', element='EditText', ref='EditText', text=text)
1557
- return {"success": True, "message": f"✅ 输入成功: '{text}' (通过 EditText 定位)"}
1939
+ self._record_input(text, 'class', 'EditText')
1940
+
1941
+ # 🎯 关键步骤:检查应用是否跳转,如果跳转则自动返回目标应用
1942
+ app_check = self._check_app_switched()
1943
+ return_result = None
1944
+ if app_check['switched']:
1945
+ return_result = self._return_to_target_app()
1946
+
1947
+ msg = f"✅ 输入成功: '{text}' (通过 EditText 定位)"
1948
+ if app_check['switched']:
1949
+ msg += f"\n{app_check['message']}"
1950
+ if return_result:
1951
+ if return_result['success']:
1952
+ msg += f"\n{return_result['message']}"
1953
+ else:
1954
+ msg += f"\n❌ 自动返回失败: {return_result['message']}"
1955
+
1956
+ return {
1957
+ "success": True,
1958
+ "message": msg,
1959
+ "app_check": app_check,
1960
+ "return_to_app": return_result
1961
+ }
1558
1962
 
1559
1963
  # 多个 EditText,选择最靠上的
1560
1964
  best_elem = None
@@ -1572,9 +1976,30 @@ class BasicMobileToolsLite:
1572
1976
  if best_elem:
1573
1977
  best_elem.set_text(text)
1574
1978
  time.sleep(0.3)
1575
- self._record_operation('input', element='EditText', ref='EditText', text=text)
1576
- return {"success": True, "message": f"✅ 输入成功: '{text}' (通过 EditText 定位,选择最顶部的)"}
1577
-
1979
+ self._record_input(text, 'class', 'EditText')
1980
+
1981
+ # 🎯 关键步骤:检查应用是否跳转,如果跳转则自动返回目标应用
1982
+ app_check = self._check_app_switched()
1983
+ return_result = None
1984
+ if app_check['switched']:
1985
+ return_result = self._return_to_target_app()
1986
+
1987
+ msg = f"✅ 输入成功: '{text}' (通过 EditText 定位,选择最顶部的)"
1988
+ if app_check['switched']:
1989
+ msg += f"\n{app_check['message']}"
1990
+ if return_result:
1991
+ if return_result['success']:
1992
+ msg += f"\n{return_result['message']}"
1993
+ else:
1994
+ msg += f"\n❌ 自动返回失败: {return_result['message']}"
1995
+
1996
+ return {
1997
+ "success": True,
1998
+ "message": msg,
1999
+ "app_check": app_check,
2000
+ "return_to_app": return_result
2001
+ }
2002
+
1578
2003
  return {"success": False, "message": f"❌ 输入框不存在: {resource_id}"}
1579
2004
 
1580
2005
  except Exception as e:
@@ -1615,17 +2040,32 @@ class BasicMobileToolsLite:
1615
2040
  x_percent = round(x / screen_width * 100, 1) if screen_width > 0 else 0
1616
2041
  y_percent = round(y / screen_height * 100, 1) if screen_height > 0 else 0
1617
2042
 
1618
- self._record_operation(
1619
- 'input',
1620
- x=x,
1621
- y=y,
1622
- x_percent=x_percent,
1623
- y_percent=y_percent,
1624
- ref=f"coords_{x}_{y}",
1625
- text=text
1626
- )
2043
+ # 使用标准记录格式
2044
+ self._record_input(text, 'percent', f"{x_percent}%,{y_percent}%", x_percent, y_percent)
1627
2045
 
1628
- return {"success": True, "message": f"✅ 输入成功: ({x}, {y}) [相对位置: {x_percent}%, {y_percent}%] -> '{text}'"}
2046
+ # 🎯 关键步骤:检查应用是否跳转,如果跳转则自动返回目标应用
2047
+ app_check = self._check_app_switched()
2048
+ return_result = None
2049
+
2050
+ if app_check['switched']:
2051
+ # 应用已跳转,尝试返回目标应用
2052
+ return_result = self._return_to_target_app()
2053
+
2054
+ msg = f"✅ 输入成功: ({x}, {y}) [相对位置: {x_percent}%, {y_percent}%] -> '{text}'"
2055
+ if app_check['switched']:
2056
+ msg += f"\n{app_check['message']}"
2057
+ if return_result:
2058
+ if return_result['success']:
2059
+ msg += f"\n{return_result['message']}"
2060
+ else:
2061
+ msg += f"\n❌ 自动返回失败: {return_result['message']}"
2062
+
2063
+ return {
2064
+ "success": True,
2065
+ "message": msg,
2066
+ "app_check": app_check,
2067
+ "return_to_app": return_result
2068
+ }
1629
2069
  except Exception as e:
1630
2070
  return {"success": False, "message": f"❌ 输入失败: {e}"}
1631
2071
 
@@ -1646,7 +2086,7 @@ class BasicMobileToolsLite:
1646
2086
  size = ios_client.wda.window_size()
1647
2087
  width, height = size[0], size[1]
1648
2088
  else:
1649
- return {"success": False, "message": "iOS 客户端未初始化"}
2089
+ return {"success": False, "msg": "iOS未初始化"}
1650
2090
  else:
1651
2091
  width, height = self.client.u2.window_size()
1652
2092
 
@@ -1684,13 +2124,16 @@ class BasicMobileToolsLite:
1684
2124
  else:
1685
2125
  self.client.u2.swipe(x1, y1, x2, y2, duration=0.5)
1686
2126
 
1687
- # 记录操作信息
1688
- record_info = {'direction': direction}
1689
- if y is not None:
1690
- record_info['y'] = y
1691
- if y_percent is not None:
1692
- record_info['y_percent'] = y_percent
1693
- self._record_operation('swipe', **record_info)
2127
+ # 使用标准记录格式
2128
+ self._record_swipe(direction)
2129
+
2130
+ # 🎯 关键步骤:检查应用是否跳转,如果跳转则自动返回目标应用
2131
+ app_check = self._check_app_switched()
2132
+ return_result = None
2133
+
2134
+ if app_check['switched']:
2135
+ # 应用已跳转,尝试返回目标应用
2136
+ return_result = self._return_to_target_app()
1694
2137
 
1695
2138
  # 构建返回消息
1696
2139
  msg = f"✅ 滑动成功: {direction}"
@@ -1700,7 +2143,21 @@ class BasicMobileToolsLite:
1700
2143
  elif y is not None:
1701
2144
  msg += f" (高度: {y}px)"
1702
2145
 
1703
- return {"success": True, "message": msg}
2146
+ # 如果检测到应用跳转,添加警告和返回结果
2147
+ if app_check['switched']:
2148
+ msg += f"\n{app_check['message']}"
2149
+ if return_result:
2150
+ if return_result['success']:
2151
+ msg += f"\n{return_result['message']}"
2152
+ else:
2153
+ msg += f"\n❌ 自动返回失败: {return_result['message']}"
2154
+
2155
+ return {
2156
+ "success": True,
2157
+ "message": msg,
2158
+ "app_check": app_check,
2159
+ "return_to_app": return_result
2160
+ }
1704
2161
  except Exception as e:
1705
2162
  return {"success": False, "message": f"❌ 滑动失败: {e}"}
1706
2163
 
@@ -1725,22 +2182,22 @@ class BasicMobileToolsLite:
1725
2182
  ios_client.wda.send_keys('\n')
1726
2183
  elif ios_key == 'home':
1727
2184
  ios_client.wda.home()
1728
- return {"success": True, "message": f"✅ 按键成功: {key}"}
1729
- return {"success": False, "message": f"iOS 不支持: {key}"}
2185
+ return {"success": True}
2186
+ return {"success": False, "msg": f"iOS不支持{key}"}
1730
2187
  else:
1731
2188
  keycode = key_map.get(key.lower())
1732
2189
  if keycode:
1733
2190
  self.client.u2.shell(f'input keyevent {keycode}')
1734
- self._record_operation('press_key', key=key)
1735
- return {"success": True, "message": f"✅ 按键成功: {key}"}
1736
- return {"success": False, "message": f"❌ 不支持的按键: {key}"}
2191
+ self._record_key(key)
2192
+ return {"success": True}
2193
+ return {"success": False, "msg": f"不支持按键{key}"}
1737
2194
  except Exception as e:
1738
2195
  return {"success": False, "message": f"❌ 按键失败: {e}"}
1739
2196
 
1740
2197
  def wait(self, seconds: float) -> Dict:
1741
2198
  """等待指定时间"""
1742
2199
  time.sleep(seconds)
1743
- return {"success": True, "message": f"✅ 已等待 {seconds} 秒"}
2200
+ return {"success": True}
1744
2201
 
1745
2202
  # ==================== 应用管理 ====================
1746
2203
 
@@ -1756,12 +2213,20 @@ class BasicMobileToolsLite:
1756
2213
 
1757
2214
  await asyncio.sleep(2)
1758
2215
 
2216
+ # 记录目标应用包名(用于后续监测应用跳转)
2217
+ self.target_package = package_name
2218
+
2219
+ # 验证是否成功启动到目标应用
2220
+ current = self._get_current_package()
2221
+ if current and current != package_name:
2222
+ return {
2223
+ "success": False,
2224
+ "message": f"❌ 启动失败:当前应用为 {current},期望 {package_name}"
2225
+ }
2226
+
1759
2227
  self._record_operation('launch_app', package_name=package_name)
1760
2228
 
1761
- return {
1762
- "success": True,
1763
- "message": f"✅ 已启动: {package_name}\n💡 建议等待 2-3 秒让页面加载"
1764
- }
2229
+ return {"success": True}
1765
2230
  except Exception as e:
1766
2231
  return {"success": False, "message": f"❌ 启动失败: {e}"}
1767
2232
 
@@ -1774,9 +2239,9 @@ class BasicMobileToolsLite:
1774
2239
  ios_client.wda.app_terminate(package_name)
1775
2240
  else:
1776
2241
  self.client.u2.app_stop(package_name)
1777
- return {"success": True, "message": f"✅ 已终止: {package_name}"}
2242
+ return {"success": True}
1778
2243
  except Exception as e:
1779
- return {"success": False, "message": f"❌ 终止失败: {e}"}
2244
+ return {"success": False, "msg": str(e)}
1780
2245
 
1781
2246
  def list_apps(self, filter_keyword: str = "") -> Dict:
1782
2247
  """列出已安装应用"""
@@ -1850,7 +2315,7 @@ class BasicMobileToolsLite:
1850
2315
  # ==================== 辅助工具 ====================
1851
2316
 
1852
2317
  def list_elements(self) -> List[Dict]:
1853
- """列出页面元素"""
2318
+ """列出页面元素(已优化:过滤排版容器,保留功能控件)"""
1854
2319
  try:
1855
2320
  if self._is_ios():
1856
2321
  ios_client = self._get_ios_client()
@@ -1858,23 +2323,271 @@ class BasicMobileToolsLite:
1858
2323
  return ios_client.list_elements()
1859
2324
  return [{"error": "iOS 暂不支持元素列表,建议使用截图"}]
1860
2325
  else:
1861
- xml_string = self._get_full_hierarchy()
2326
+ xml_string = self.client.u2.dump_hierarchy(compressed=False)
1862
2327
  elements = self.client.xml_parser.parse(xml_string)
1863
2328
 
2329
+ # 功能控件类型(需要保留)
2330
+ FUNCTIONAL_WIDGETS = {
2331
+ 'TextView', 'Text', 'Label', # 文本类
2332
+ 'ImageView', 'Image', 'ImageButton', # 图片类
2333
+ 'Button', 'CheckBox', 'RadioButton', 'Switch', # 交互类
2334
+ 'SeekBar', 'ProgressBar', 'RatingBar', # 滑动/进度类
2335
+ 'EditText', 'TextInput', # 输入类
2336
+ 'VideoView', 'WebView', # 特殊功能类
2337
+ 'RecyclerView', 'ListView', 'GridView', # 列表类
2338
+ 'ScrollView', 'NestedScrollView', # 滚动容器(有实际功能)
2339
+ }
2340
+
2341
+ # 容器控件类型(需要过滤,除非有业务ID)
2342
+ CONTAINER_WIDGETS = {
2343
+ 'FrameLayout', 'LinearLayout', 'RelativeLayout',
2344
+ 'ViewGroup', 'ConstraintLayout', 'CoordinatorLayout',
2345
+ 'CardView', 'View', # 基础View也可能只是容器
2346
+ }
2347
+
2348
+ # 装饰类控件关键词(resource_id中包含这些关键词的通常可以过滤)
2349
+ # 支持匹配如 qylt_item_short_video_shadow_one 这样的命名
2350
+ DECORATIVE_KEYWORDS = {
2351
+ 'shadow', 'divider', 'separator', 'line', 'border',
2352
+ 'background', 'bg_', '_bg', 'decorative', 'decoration',
2353
+ '_shadow', 'shadow_', '_divider', 'divider_', '_line', 'line_'
2354
+ }
2355
+
2356
+ # Token 优化:构建精简元素(只返回非空字段)
2357
+ def build_compact_element(resource_id, text, content_desc, bounds, likely_click, class_name):
2358
+ """只返回有值的字段,节省 token"""
2359
+ item = {}
2360
+ if resource_id:
2361
+ # 精简 resource_id,只保留最后一段
2362
+ item['id'] = resource_id.split('/')[-1] if '/' in resource_id else resource_id
2363
+ if text:
2364
+ item['text'] = text
2365
+ if content_desc:
2366
+ item['desc'] = content_desc
2367
+ if bounds:
2368
+ item['bounds'] = bounds
2369
+ if likely_click:
2370
+ item['click'] = True # 启发式判断可点击
2371
+ # class 精简:只保留关键类型
2372
+ if class_name in ('EditText', 'TextInput', 'Button', 'ImageButton', 'CheckBox', 'Switch'):
2373
+ item['type'] = class_name
2374
+ return item
2375
+
1864
2376
  result = []
1865
2377
  for elem in elements:
1866
- if elem.get('clickable') or elem.get('focusable'):
1867
- result.append({
1868
- 'resource_id': elem.get('resource_id', ''),
1869
- 'text': elem.get('text', ''),
1870
- 'content_desc': elem.get('content_desc', ''),
1871
- 'bounds': elem.get('bounds', ''),
1872
- 'clickable': elem.get('clickable', False)
1873
- })
2378
+ # 获取元素属性
2379
+ class_name = elem.get('class_name', '')
2380
+ resource_id = elem.get('resource_id', '').strip()
2381
+ text = elem.get('text', '').strip()
2382
+ content_desc = elem.get('content_desc', '').strip()
2383
+ bounds = elem.get('bounds', '')
2384
+ clickable = elem.get('clickable', False)
2385
+ focusable = elem.get('focusable', False)
2386
+ scrollable = elem.get('scrollable', False)
2387
+ enabled = elem.get('enabled', True)
2388
+
2389
+ # 1. 过滤 bounds="[0,0][0,0]" 的视觉隐藏元素
2390
+ if bounds == '[0,0][0,0]':
2391
+ continue
2392
+
2393
+ # 2. 检查是否是功能控件(直接保留)
2394
+ if class_name in FUNCTIONAL_WIDGETS:
2395
+ # 使用启发式判断可点击性(替代不准确的 clickable 属性)
2396
+ likely_click = self._is_likely_clickable(class_name, resource_id, text, content_desc, clickable, bounds)
2397
+ item = build_compact_element(resource_id, text, content_desc, bounds, likely_click, class_name)
2398
+ if item:
2399
+ result.append(item)
2400
+ continue
2401
+
2402
+ # 3. 检查是否是容器控件
2403
+ if class_name in CONTAINER_WIDGETS:
2404
+ # 容器控件需要检查是否有业务相关的ID
2405
+ has_business_id = self._has_business_id(resource_id)
2406
+ if not has_business_id:
2407
+ # 无业务ID的容器控件,检查是否有其他有意义属性
2408
+ if not (clickable or focusable or scrollable or text or content_desc):
2409
+ # 所有属性都是默认值,过滤掉
2410
+ continue
2411
+ # 有业务ID或其他有意义属性,保留
2412
+ likely_click = self._is_likely_clickable(class_name, resource_id, text, content_desc, clickable, bounds)
2413
+ item = build_compact_element(resource_id, text, content_desc, bounds, likely_click, class_name)
2414
+ if item:
2415
+ result.append(item)
2416
+ continue
2417
+
2418
+ # 4. 检查是否是装饰类控件
2419
+ if resource_id:
2420
+ resource_id_lower = resource_id.lower()
2421
+ if any(keyword in resource_id_lower for keyword in DECORATIVE_KEYWORDS):
2422
+ # 是装饰类控件,且没有交互属性,过滤掉
2423
+ if not (clickable or focusable or text or content_desc):
2424
+ continue
2425
+
2426
+ # 5. 检查是否所有属性均为默认值
2427
+ if not (text or content_desc or resource_id or clickable or focusable or scrollable):
2428
+ # 所有属性都是默认值,过滤掉
2429
+ continue
2430
+
2431
+ # 6. 其他情况:有意义的元素保留
2432
+ likely_click = self._is_likely_clickable(class_name, resource_id, text, content_desc, clickable, bounds)
2433
+ item = build_compact_element(resource_id, text, content_desc, bounds, likely_click, class_name)
2434
+ if item:
2435
+ result.append(item)
2436
+
2437
+ # Token 优化:可选限制返回元素数量(默认不限制,确保准确度)
2438
+ if TOKEN_OPTIMIZATION and MAX_ELEMENTS > 0 and len(result) > MAX_ELEMENTS:
2439
+ # 仅在用户明确设置 MAX_ELEMENTS_RETURN 时才截断
2440
+ truncated = result[:MAX_ELEMENTS]
2441
+ truncated.append({
2442
+ '_truncated': True,
2443
+ '_total': len(result),
2444
+ '_shown': MAX_ELEMENTS
2445
+ })
2446
+ return truncated
2447
+
1874
2448
  return result
1875
2449
  except Exception as e:
1876
2450
  return [{"error": f"获取元素失败: {e}"}]
1877
2451
 
2452
+ def _get_page_texts(self, max_count: int = 15) -> List[str]:
2453
+ """获取页面关键文本列表(用于点击后快速确认页面变化)
2454
+
2455
+ Args:
2456
+ max_count: 最多返回的文本数量
2457
+
2458
+ Returns:
2459
+ 页面上的关键文本列表(去重)
2460
+ """
2461
+ try:
2462
+ if self._is_ios():
2463
+ ios_client = self._get_ios_client()
2464
+ if ios_client and hasattr(ios_client, 'wda'):
2465
+ # iOS: 获取所有 StaticText 的文本
2466
+ elements = ios_client.wda(type='XCUIElementTypeStaticText').find_elements()
2467
+ texts = set()
2468
+ for elem in elements[:50]: # 限制扫描数量
2469
+ try:
2470
+ name = elem.name or elem.label
2471
+ if name and len(name) > 1 and len(name) < 50:
2472
+ texts.add(name)
2473
+ except:
2474
+ pass
2475
+ return list(texts)[:max_count]
2476
+ return []
2477
+ else:
2478
+ # Android: 快速扫描 XML 获取文本
2479
+ xml_string = self.client.u2.dump_hierarchy(compressed=True)
2480
+ import xml.etree.ElementTree as ET
2481
+ root = ET.fromstring(xml_string)
2482
+
2483
+ texts = set()
2484
+ for elem in root.iter():
2485
+ text = elem.get('text', '').strip()
2486
+ desc = elem.get('content-desc', '').strip()
2487
+ # 只收集有意义的文本(长度2-30,非纯数字)
2488
+ for t in [text, desc]:
2489
+ if t and 2 <= len(t) <= 30 and not t.isdigit():
2490
+ texts.add(t)
2491
+ if len(texts) >= max_count * 2: # 收集足够后停止
2492
+ break
2493
+
2494
+ return list(texts)[:max_count]
2495
+ except Exception:
2496
+ return []
2497
+
2498
+ def _has_business_id(self, resource_id: str) -> bool:
2499
+ """
2500
+ 判断resource_id是否是业务相关的ID
2501
+
2502
+ 业务相关的ID通常包含:
2503
+ - 有意义的命名(不是自动生成的)
2504
+ - 不包含常见的自动生成模式
2505
+ """
2506
+ if not resource_id:
2507
+ return False
2508
+
2509
+ # 自动生成的ID模式(通常可以忽略)
2510
+ auto_generated_patterns = [
2511
+ r'^android:id/', # 系统ID
2512
+ r':id/\d+', # 数字ID
2513
+ r':id/view_\d+', # view_数字
2514
+ r':id/item_\d+', # item_数字
2515
+ ]
2516
+
2517
+ for pattern in auto_generated_patterns:
2518
+ if re.search(pattern, resource_id):
2519
+ return False
2520
+
2521
+ # 如果resource_id有实际内容且不是自动生成的,认为是业务ID
2522
+ # 排除一些常见的系统ID
2523
+ system_ids = ['android:id/content', 'android:id/statusBarBackground']
2524
+ if resource_id in system_ids:
2525
+ return False
2526
+
2527
+ return True
2528
+
2529
+ def _is_likely_clickable(self, class_name: str, resource_id: str, text: str,
2530
+ content_desc: str, clickable: bool, bounds: str) -> bool:
2531
+ """
2532
+ 启发式判断元素是否可能可点击
2533
+
2534
+ Android 的 clickable 属性经常不准确,因为:
2535
+ 1. 点击事件可能设置在父容器上
2536
+ 2. 使用 onTouchListener 而不是 onClick
2537
+ 3. RecyclerView item 通过 ItemClickListener 处理
2538
+
2539
+ 此方法通过多种规则推断元素的真实可点击性
2540
+ """
2541
+ # 规则1:clickable=true 肯定可点击
2542
+ if clickable:
2543
+ return True
2544
+
2545
+ # 规则2:特定类型的控件通常可点击
2546
+ TYPICALLY_CLICKABLE = {
2547
+ 'Button', 'ImageButton', 'CheckBox', 'RadioButton', 'Switch',
2548
+ 'ToggleButton', 'FloatingActionButton', 'Chip', 'TabView',
2549
+ 'EditText', 'TextInput', # 输入框可点击获取焦点
2550
+ }
2551
+ if class_name in TYPICALLY_CLICKABLE:
2552
+ return True
2553
+
2554
+ # 规则3:resource_id 包含可点击关键词
2555
+ if resource_id:
2556
+ id_lower = resource_id.lower()
2557
+ CLICK_KEYWORDS = [
2558
+ 'btn', 'button', 'click', 'tap', 'submit', 'confirm',
2559
+ 'cancel', 'close', 'back', 'next', 'prev', 'more',
2560
+ 'action', 'link', 'menu', 'tab', 'item', 'cell',
2561
+ 'card', 'avatar', 'icon', 'entry', 'option', 'arrow'
2562
+ ]
2563
+ for kw in CLICK_KEYWORDS:
2564
+ if kw in id_lower:
2565
+ return True
2566
+
2567
+ # 规则4:content_desc 包含可点击暗示
2568
+ if content_desc:
2569
+ desc_lower = content_desc.lower()
2570
+ CLICK_HINTS = ['点击', '按钮', '关闭', '返回', '更多', 'click', 'tap', 'button', 'close']
2571
+ for hint in CLICK_HINTS:
2572
+ if hint in desc_lower:
2573
+ return True
2574
+
2575
+ # 规则5:有 resource_id 或 content_desc 的小图标可能可点击
2576
+ # (纯 ImageView 不加判断,误判率太高)
2577
+ if class_name in ('ImageView', 'Image') and (resource_id or content_desc) and bounds:
2578
+ match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
2579
+ if match:
2580
+ x1, y1, x2, y2 = map(int, match.groups())
2581
+ w, h = x2 - x1, y2 - y1
2582
+ # 小图标(20-100px)更可能是按钮
2583
+ if 20 <= w <= 100 and 20 <= h <= 100:
2584
+ return True
2585
+
2586
+ # 规则6:移除(TextView 误判率太高,只依赖上面的规则)
2587
+ # 如果有 clickable=true 或 ID/desc 中有关键词,前面的规则已经覆盖
2588
+
2589
+ return False
2590
+
1878
2591
  def find_close_button(self) -> Dict:
1879
2592
  """智能查找关闭按钮(不点击,只返回位置)
1880
2593
 
@@ -1888,17 +2601,25 @@ class BasicMobileToolsLite:
1888
2601
  import re
1889
2602
 
1890
2603
  if self._is_ios():
1891
- return {"success": False, "message": "iOS 暂不支持,请使用截图+坐标点击"}
2604
+ return {"success": False, "msg": "iOS暂不支持"}
1892
2605
 
1893
2606
  # 获取屏幕尺寸
1894
2607
  screen_width = self.client.u2.info.get('displayWidth', 720)
1895
2608
  screen_height = self.client.u2.info.get('displayHeight', 1280)
1896
2609
 
1897
- # 获取元素列表(使用完整 UI 层级)
1898
- xml_string = self._get_full_hierarchy()
2610
+ # 获取元素列表
2611
+ xml_string = self.client.u2.dump_hierarchy(compressed=False)
1899
2612
  import xml.etree.ElementTree as ET
1900
2613
  root = ET.fromstring(xml_string)
1901
2614
 
2615
+ # 🔴 先检测是否有弹窗,避免误识别普通页面的按钮
2616
+ popup_bounds, popup_confidence = self._detect_popup_with_confidence(
2617
+ root, screen_width, screen_height
2618
+ )
2619
+
2620
+ if popup_bounds is None or popup_confidence < 0.5:
2621
+ return {"success": True, "popup": False}
2622
+
1902
2623
  # 关闭按钮特征
1903
2624
  close_texts = ['×', 'X', 'x', '关闭', '取消', 'close', 'Close', '跳过', '知道了', '我知道了']
1904
2625
  candidates = []
@@ -2000,27 +2721,16 @@ class BasicMobileToolsLite:
2000
2721
  candidates.sort(key=lambda x: x['score'], reverse=True)
2001
2722
  best = candidates[0]
2002
2723
 
2724
+ # Token 优化:只返回最必要的信息
2003
2725
  return {
2004
2726
  "success": True,
2005
- "message": f"✅ 找到可能的关闭按钮",
2006
- "best_candidate": {
2007
- "reason": best['reason'],
2008
- "center": {"x": best['center_x'], "y": best['center_y']},
2009
- "percent": {"x": best['x_percent'], "y": best['y_percent']},
2010
- "bounds": best['bounds'],
2011
- "size": best['size'],
2012
- "score": best['score']
2013
- },
2014
- "click_command": f"mobile_click_by_percent({best['x_percent']}, {best['y_percent']})",
2015
- "other_candidates": [
2016
- {"reason": c['reason'], "percent": f"({c['x_percent']}%, {c['y_percent']}%)", "score": c['score']}
2017
- for c in candidates[1:4]
2018
- ] if len(candidates) > 1 else [],
2019
- "screen_size": {"width": screen_width, "height": screen_height}
2727
+ "popup": True,
2728
+ "close": {"x": best['x_percent'], "y": best['y_percent']},
2729
+ "cmd": f"click_by_percent({best['x_percent']},{best['y_percent']})"
2020
2730
  }
2021
2731
 
2022
2732
  except Exception as e:
2023
- return {"success": False, "message": f"❌ 查找关闭按钮失败: {e}"}
2733
+ return {"success": False, "msg": str(e)}
2024
2734
 
2025
2735
  def close_popup(self) -> Dict:
2026
2736
  """智能关闭弹窗(改进版)
@@ -2043,13 +2753,13 @@ class BasicMobileToolsLite:
2043
2753
 
2044
2754
  # 获取屏幕尺寸
2045
2755
  if self._is_ios():
2046
- return {"success": False, "message": "iOS 暂不支持,请使用截图+坐标点击"}
2756
+ return {"success": False, "msg": "iOS暂不支持"}
2047
2757
 
2048
2758
  screen_width = self.client.u2.info.get('displayWidth', 720)
2049
2759
  screen_height = self.client.u2.info.get('displayHeight', 1280)
2050
2760
 
2051
- # 获取原始 XML(使用完整 UI 层级)
2052
- xml_string = self._get_full_hierarchy()
2761
+ # 获取原始 XML
2762
+ xml_string = self.client.u2.dump_hierarchy(compressed=False)
2053
2763
 
2054
2764
  # 关闭按钮的文本特征
2055
2765
  close_texts = ['×', 'X', 'x', '关闭', '取消', 'close', 'Close', 'CLOSE', '跳过', '知道了']
@@ -2063,53 +2773,18 @@ class BasicMobileToolsLite:
2063
2773
  root = ET.fromstring(xml_string)
2064
2774
  all_elements = list(root.iter())
2065
2775
 
2066
- # ===== 第一步:检测弹窗区域 =====
2067
- # 弹窗特征:非全屏、面积较大、通常在屏幕中央的容器
2068
- popup_containers = []
2069
- for idx, elem in enumerate(all_elements):
2070
- bounds_str = elem.attrib.get('bounds', '')
2071
- class_name = elem.attrib.get('class', '')
2072
-
2073
- if not bounds_str:
2074
- continue
2075
-
2076
- match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds_str)
2077
- if not match:
2078
- continue
2079
-
2080
- x1, y1, x2, y2 = map(int, match.groups())
2081
- width = x2 - x1
2082
- height = y2 - y1
2083
- area = width * height
2084
- screen_area = screen_width * screen_height
2085
-
2086
- # 弹窗容器特征:
2087
- # 1. 面积在屏幕的 10%-90% 之间(非全屏)
2088
- # 2. 宽度或高度不等于屏幕尺寸
2089
- # 3. 是容器类型(Layout/View/Dialog)
2090
- is_container = any(kw in class_name for kw in ['Layout', 'View', 'Dialog', 'Card', 'Container'])
2091
- area_ratio = area / screen_area
2092
- is_not_fullscreen = (width < screen_width * 0.98 or height < screen_height * 0.98)
2093
- is_reasonable_size = 0.08 < area_ratio < 0.9
2094
-
2095
- # 排除状态栏区域(y1 通常很小)
2096
- is_below_statusbar = y1 > 50
2097
-
2098
- if is_container and is_not_fullscreen and is_reasonable_size and is_below_statusbar:
2099
- popup_containers.append({
2100
- 'bounds': (x1, y1, x2, y2),
2101
- 'bounds_str': bounds_str,
2102
- 'area': area,
2103
- 'area_ratio': area_ratio,
2104
- 'idx': idx, # 元素在 XML 中的顺序(越后越上层)
2105
- 'class': class_name
2106
- })
2776
+ # ===== 第一步:使用严格的置信度检测弹窗区域 =====
2777
+ popup_bounds, popup_confidence = self._detect_popup_with_confidence(
2778
+ root, screen_width, screen_height
2779
+ )
2780
+
2781
+ # 如果置信度不够高,记录但继续尝试查找关闭按钮
2782
+ popup_detected = popup_bounds is not None and popup_confidence >= 0.6
2107
2783
 
2108
- # 选择最可能的弹窗容器(优先选择:XML 顺序靠后 + 面积适中)
2109
- if popup_containers:
2110
- # XML 顺序倒序(后出现的在上层),然后按面积适中程度排序
2111
- popup_containers.sort(key=lambda x: (x['idx'], -abs(x['area_ratio'] - 0.3)), reverse=True)
2112
- popup_bounds = popup_containers[0]['bounds']
2784
+ # 🔴 关键检查:如果没有检测到弹窗区域,直接返回"无弹窗"
2785
+ # 避免误点击普通页面上的"关闭"、"取消"等按钮
2786
+ if not popup_detected:
2787
+ return {"success": True, "popup": False}
2113
2788
 
2114
2789
  # ===== 第二步:在弹窗范围内查找关闭按钮 =====
2115
2790
  for idx, elem in enumerate(all_elements):
@@ -2241,73 +2916,16 @@ class BasicMobileToolsLite:
2241
2916
  'content_desc': content_desc,
2242
2917
  'x_percent': round(rel_x * 100, 1),
2243
2918
  'y_percent': round(rel_y * 100, 1),
2244
- 'in_popup': popup_bounds is not None
2919
+ 'in_popup': popup_detected
2245
2920
  })
2246
2921
 
2247
2922
  except ET.ParseError:
2248
2923
  pass
2249
2924
 
2250
2925
  if not close_candidates:
2251
- # 如果检测到弹窗区域,先尝试点击常见的关闭按钮位置
2252
- if popup_bounds:
2253
- px1, py1, px2, py2 = popup_bounds
2254
- popup_width = px2 - px1
2255
- popup_height = py2 - py1
2256
-
2257
- # 【优化】X按钮有三种常见位置:
2258
- # 1. 弹窗内靠近顶部边界(内嵌X按钮)- 最常见
2259
- # 2. 弹窗边界上方(浮动X按钮)
2260
- # 3. 弹窗正下方(底部关闭按钮)
2261
- offset_x = max(60, int(popup_width * 0.07)) # 宽度7%
2262
- offset_y_above = max(35, int(popup_height * 0.025)) # 高度2.5%,在边界之上
2263
- offset_y_near = max(45, int(popup_height * 0.03)) # 高度3%,紧贴顶边界内侧
2264
-
2265
- try_positions = [
2266
- # 【最高优先级】弹窗内紧贴顶部边界
2267
- (px2 - offset_x, py1 + offset_y_near, "弹窗右上角"),
2268
- # 弹窗边界上方(浮动X按钮)
2269
- (px2 - offset_x, py1 - offset_y_above, "弹窗右上浮"),
2270
- # 弹窗正下方中间(底部关闭按钮)
2271
- ((px1 + px2) // 2, py2 + max(50, int(popup_height * 0.04)), "弹窗下方中间"),
2272
- # 弹窗正上方中间
2273
- ((px1 + px2) // 2, py1 - 40, "弹窗正上方"),
2274
- ]
2275
-
2276
- for try_x, try_y, position_name in try_positions:
2277
- if 0 <= try_x <= screen_width and 0 <= try_y <= screen_height:
2278
- self.client.u2.click(try_x, try_y)
2279
- time.sleep(0.3)
2280
-
2281
- # 尝试后截图,让 AI 判断是否成功
2282
- screenshot_result = self.take_screenshot("尝试关闭后")
2283
- return {
2284
- "success": True,
2285
- "message": f"✅ 已尝试点击常见关闭按钮位置",
2286
- "tried_positions": [p[2] for p in try_positions],
2287
- "screenshot": screenshot_result.get("screenshot_path", ""),
2288
- "tip": "请查看截图确认弹窗是否已关闭。如果还在,可手动分析截图找到关闭按钮位置。"
2289
- }
2290
-
2291
- # 没有检测到弹窗区域,截图让 AI 分析
2292
- screenshot_result = self.take_screenshot(description="页面截图", compress=True)
2293
-
2294
- return {
2295
- "success": False,
2296
- "message": "❌ 未检测到弹窗区域,已截图供 AI 分析",
2297
- "action_required": "请查看截图找到关闭按钮,调用 mobile_click_at_coords 点击",
2298
- "screenshot": screenshot_result.get("screenshot_path", ""),
2299
- "screen_size": {"width": screen_width, "height": screen_height},
2300
- "image_size": {
2301
- "width": screenshot_result.get("image_width", screen_width),
2302
- "height": screenshot_result.get("image_height", screen_height)
2303
- },
2304
- "original_size": {
2305
- "width": screenshot_result.get("original_img_width", screen_width),
2306
- "height": screenshot_result.get("original_img_height", screen_height)
2307
- },
2308
- "search_areas": ["弹窗右上角", "弹窗正上方", "弹窗下方中间", "屏幕右上角"],
2309
- "time_warning": "⚠️ 截图分析期间弹窗可能自动消失。如果是定时弹窗,建议等待其自动消失。"
2310
- }
2926
+ if popup_detected and popup_bounds:
2927
+ return {"success": False, "fallback": "vision", "popup": True}
2928
+ return {"success": True, "popup": False}
2311
2929
 
2312
2930
  # 按得分排序,取最可能的
2313
2931
  close_candidates.sort(key=lambda x: x['score'], reverse=True)
@@ -2317,49 +2935,30 @@ class BasicMobileToolsLite:
2317
2935
  self.client.u2.click(best['center_x'], best['center_y'])
2318
2936
  time.sleep(0.5)
2319
2937
 
2320
- # 点击后截图,让 AI 判断是否成功
2321
- screenshot_result = self.take_screenshot("关闭弹窗后")
2322
-
2323
- # 记录操作(使用百分比,跨设备兼容)
2324
- self._record_operation(
2325
- 'click',
2326
- x=best['center_x'],
2327
- y=best['center_y'],
2328
- x_percent=best['x_percent'],
2329
- y_percent=best['y_percent'],
2330
- screen_width=screen_width,
2331
- screen_height=screen_height,
2332
- ref=f"close_popup_{best['position']}"
2333
- )
2938
+ # 🎯 关键步骤:检查应用是否跳转,如果跳转说明弹窗去除失败,需要返回目标应用
2939
+ app_check = self._check_app_switched()
2940
+ return_result = None
2334
2941
 
2335
- # 返回候选按钮列表,让 AI 看截图判断
2336
- # 如果弹窗还在,AI 可以选择点击其他候选按钮
2337
- return {
2338
- "success": True,
2339
- "message": f"✅ 已点击关闭按钮 ({best['position']}): ({best['center_x']}, {best['center_y']})",
2340
- "clicked": {
2341
- "position": best['position'],
2342
- "match_type": best['match_type'],
2343
- "coords": (best['center_x'], best['center_y']),
2344
- "percent": (best['x_percent'], best['y_percent'])
2345
- },
2346
- "screenshot": screenshot_result.get("screenshot_path", ""),
2347
- "popup_detected": popup_bounds is not None,
2348
- "popup_bounds": f"[{popup_bounds[0]},{popup_bounds[1]}][{popup_bounds[2]},{popup_bounds[3]}]" if popup_bounds else None,
2349
- "other_candidates": [
2350
- {
2351
- "position": c['position'],
2352
- "type": c['match_type'],
2353
- "coords": (c['center_x'], c['center_y']),
2354
- "percent": (c['x_percent'], c['y_percent'])
2355
- }
2356
- for c in close_candidates[1:4] # 返回其他3个候选,AI 可以选择
2357
- ],
2358
- "tip": "请查看截图判断弹窗是否已关闭。如果弹窗还在,可以尝试点击 other_candidates 中的其他位置;如果误点跳转了,请按返回键"
2359
- }
2942
+ if app_check['switched']:
2943
+ # 应用已跳转,说明弹窗去除失败,尝试返回目标应用
2944
+ return_result = self._return_to_target_app()
2945
+
2946
+ # 记录操作
2947
+ self._record_click('percent', f"{best['x_percent']}%,{best['y_percent']}%",
2948
+ best['x_percent'], best['y_percent'],
2949
+ element_desc=f"关闭按钮({best['position']})")
2950
+
2951
+ # Token 优化:精简返回值
2952
+ result = {"success": True, "clicked": True}
2953
+ if app_check['switched']:
2954
+ result["switched"] = True
2955
+ if return_result:
2956
+ result["returned"] = return_result['success']
2957
+
2958
+ return result
2360
2959
 
2361
2960
  except Exception as e:
2362
- return {"success": False, "message": f"❌ 关闭弹窗失败: {e}"}
2961
+ return {"success": False, "msg": str(e)}
2363
2962
 
2364
2963
  def _get_position_name(self, rel_x: float, rel_y: float) -> str:
2365
2964
  """根据相对坐标获取位置名称"""
@@ -2402,6 +3001,308 @@ class BasicMobileToolsLite:
2402
3001
  return 0.8
2403
3002
  else: # 中间区域
2404
3003
  return 0.5
3004
+
3005
+ def _detect_popup_with_confidence(self, root, screen_width: int, screen_height: int) -> tuple:
3006
+ """严格的弹窗检测 - 使用置信度评分,避免误识别普通页面
3007
+
3008
+ 真正的弹窗特征:
3009
+ 1. class 名称包含 Dialog/Popup/Alert/Modal/BottomSheet(强特征)
3010
+ 2. resource-id 包含 dialog/popup/alert/modal(强特征)
3011
+ 3. 有遮罩层(大面积半透明 View 在弹窗之前)
3012
+ 4. 居中显示且非全屏
3013
+ 5. XML 层级靠后且包含可交互元素
3014
+
3015
+ Returns:
3016
+ (popup_bounds, confidence) 或 (None, 0)
3017
+ confidence >= 0.6 才认为是弹窗
3018
+ """
3019
+ import re
3020
+
3021
+ screen_area = screen_width * screen_height
3022
+
3023
+ # 收集所有元素信息
3024
+ all_elements = []
3025
+ for idx, elem in enumerate(root.iter()):
3026
+ bounds_str = elem.attrib.get('bounds', '')
3027
+ if not bounds_str:
3028
+ continue
3029
+
3030
+ match = re.match(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds_str)
3031
+ if not match:
3032
+ continue
3033
+
3034
+ x1, y1, x2, y2 = map(int, match.groups())
3035
+ width = x2 - x1
3036
+ height = y2 - y1
3037
+ area = width * height
3038
+
3039
+ class_name = elem.attrib.get('class', '')
3040
+ resource_id = elem.attrib.get('resource-id', '')
3041
+ clickable = elem.attrib.get('clickable', 'false') == 'true'
3042
+
3043
+ all_elements.append({
3044
+ 'idx': idx,
3045
+ 'bounds': (x1, y1, x2, y2),
3046
+ 'width': width,
3047
+ 'height': height,
3048
+ 'area': area,
3049
+ 'area_ratio': area / screen_area if screen_area > 0 else 0,
3050
+ 'class': class_name,
3051
+ 'resource_id': resource_id,
3052
+ 'clickable': clickable,
3053
+ 'center_x': (x1 + x2) // 2,
3054
+ 'center_y': (y1 + y2) // 2,
3055
+ })
3056
+
3057
+ if not all_elements:
3058
+ return None, 0
3059
+
3060
+ # 弹窗检测关键词
3061
+ dialog_class_keywords = ['Dialog', 'Popup', 'Alert', 'Modal', 'BottomSheet', 'PopupWindow']
3062
+ dialog_id_keywords = ['dialog', 'popup', 'alert', 'modal', 'bottom_sheet', 'overlay', 'mask']
3063
+
3064
+ popup_candidates = []
3065
+ has_mask_layer = False
3066
+ mask_idx = -1
3067
+
3068
+ for elem in all_elements:
3069
+ x1, y1, x2, y2 = elem['bounds']
3070
+ class_name = elem['class']
3071
+ resource_id = elem['resource_id']
3072
+ area_ratio = elem['area_ratio']
3073
+
3074
+ # 检测遮罩层(大面积、几乎全屏、通常是 FrameLayout/View)
3075
+ if area_ratio > 0.85 and elem['width'] >= screen_width * 0.95:
3076
+ # 可能是遮罩层,记录位置
3077
+ if 'FrameLayout' in class_name or 'View' in class_name:
3078
+ has_mask_layer = True
3079
+ mask_idx = elem['idx']
3080
+
3081
+ # 跳过全屏元素
3082
+ if area_ratio > 0.9:
3083
+ continue
3084
+
3085
+ # 跳过太小的元素
3086
+ if area_ratio < 0.05:
3087
+ continue
3088
+
3089
+ # 跳过状态栏区域
3090
+ if y1 < 50:
3091
+ continue
3092
+
3093
+ confidence = 0.0
3094
+
3095
+ # 【强特征】class 名称包含弹窗关键词 (+0.5)
3096
+ if any(kw in class_name for kw in dialog_class_keywords):
3097
+ confidence += 0.5
3098
+
3099
+ # 【强特征】resource-id 包含弹窗关键词 (+0.4)
3100
+ if any(kw in resource_id.lower() for kw in dialog_id_keywords):
3101
+ confidence += 0.4
3102
+
3103
+ # 【中等特征】居中显示 (+0.2)
3104
+ center_x = elem['center_x']
3105
+ center_y = elem['center_y']
3106
+ is_centered_x = abs(center_x - screen_width / 2) < screen_width * 0.15
3107
+ is_centered_y = abs(center_y - screen_height / 2) < screen_height * 0.25
3108
+ if is_centered_x and is_centered_y:
3109
+ confidence += 0.2
3110
+ elif is_centered_x:
3111
+ confidence += 0.1
3112
+
3113
+ # 【中等特征】非全屏但有一定大小 (+0.15)
3114
+ if 0.15 < area_ratio < 0.75:
3115
+ confidence += 0.15
3116
+
3117
+ # 【弱特征】XML 顺序靠后(在视图层级上层)(+0.1)
3118
+ if elem['idx'] > len(all_elements) * 0.5:
3119
+ confidence += 0.1
3120
+
3121
+ # 【弱特征】有遮罩层且在遮罩层之后 (+0.15)
3122
+ if has_mask_layer and elem['idx'] > mask_idx:
3123
+ confidence += 0.15
3124
+
3125
+ # 只有达到阈值才加入候选
3126
+ if confidence >= 0.3:
3127
+ popup_candidates.append({
3128
+ 'bounds': elem['bounds'],
3129
+ 'confidence': confidence,
3130
+ 'class': class_name,
3131
+ 'resource_id': resource_id,
3132
+ 'idx': elem['idx']
3133
+ })
3134
+
3135
+ if not popup_candidates:
3136
+ return None, 0
3137
+
3138
+ # 选择置信度最高的
3139
+ popup_candidates.sort(key=lambda x: (x['confidence'], x['idx']), reverse=True)
3140
+ best = popup_candidates[0]
3141
+
3142
+ # 只有置信度 >= 0.6 才返回弹窗
3143
+ if best['confidence'] >= 0.6:
3144
+ return best['bounds'], best['confidence']
3145
+
3146
+ return None, best['confidence']
3147
+
3148
+ def start_toast_watch(self) -> Dict:
3149
+ """开始监听 Toast(仅 Android)
3150
+
3151
+ ⚠️ 必须在执行操作之前调用!
3152
+
3153
+ 正确流程:
3154
+ 1. 调用 mobile_start_toast_watch() 开始监听
3155
+ 2. 执行操作(如点击提交按钮)
3156
+ 3. 调用 mobile_get_toast() 获取 Toast 内容
3157
+
3158
+ Returns:
3159
+ 监听状态
3160
+ """
3161
+ if self._is_ios():
3162
+ return {
3163
+ "success": False,
3164
+ "message": "❌ iOS 不支持 Toast 检测,Toast 是 Android 特有功能"
3165
+ }
3166
+
3167
+ try:
3168
+ # 清除缓存并开始监听
3169
+ self.client.u2.toast.reset()
3170
+ return {
3171
+ "success": True,
3172
+ "message": "✅ Toast 监听已开启,请立即执行操作,然后调用 mobile_get_toast 获取结果"
3173
+ }
3174
+ except Exception as e:
3175
+ return {
3176
+ "success": False,
3177
+ "message": f"❌ 开启 Toast 监听失败: {e}"
3178
+ }
3179
+
3180
+ def get_toast(self, timeout: float = 5.0, reset_first: bool = False) -> Dict:
3181
+ """获取 Toast 消息(仅 Android)
3182
+
3183
+ Toast 是 Android 系统级的短暂提示消息,常用于显示操作结果。
3184
+
3185
+ ⚠️ 推荐用法(两步走):
3186
+ 1. 先调用 mobile_start_toast_watch() 开始监听
3187
+ 2. 执行操作(如点击提交按钮)
3188
+ 3. 调用 mobile_get_toast() 获取 Toast
3189
+
3190
+ 或者设置 reset_first=True,会自动 reset 后等待(适合操作已自动触发的场景)
3191
+
3192
+ Args:
3193
+ timeout: 等待 Toast 出现的超时时间(秒),默认 5 秒
3194
+ reset_first: 是否先 reset(清除旧缓存),默认 False
3195
+
3196
+ Returns:
3197
+ 包含 Toast 消息的字典
3198
+ """
3199
+ if self._is_ios():
3200
+ return {
3201
+ "success": False,
3202
+ "message": "❌ iOS 不支持 Toast 检测,Toast 是 Android 特有功能"
3203
+ }
3204
+
3205
+ try:
3206
+ if reset_first:
3207
+ # 清除旧缓存,适合等待即将出现的 Toast
3208
+ self.client.u2.toast.reset()
3209
+
3210
+ # 等待并获取 Toast 消息
3211
+ toast_message = self.client.u2.toast.get_message(
3212
+ wait_timeout=timeout,
3213
+ default=None
3214
+ )
3215
+
3216
+ if toast_message:
3217
+ return {
3218
+ "success": True,
3219
+ "toast_found": True,
3220
+ "message": toast_message,
3221
+ "tip": "Toast 消息获取成功"
3222
+ }
3223
+ else:
3224
+ return {
3225
+ "success": True,
3226
+ "toast_found": False,
3227
+ "message": None,
3228
+ "tip": f"在 {timeout} 秒内未检测到 Toast。提示:先调用 mobile_start_toast_watch,再执行操作,最后调用此工具"
3229
+ }
3230
+ except Exception as e:
3231
+ return {
3232
+ "success": False,
3233
+ "message": f"❌ 获取 Toast 失败: {e}"
3234
+ }
3235
+
3236
+ def assert_toast(self, expected_text: str, timeout: float = 5.0, contains: bool = True) -> Dict:
3237
+ """断言 Toast 消息(仅 Android)
3238
+
3239
+ 等待 Toast 出现并验证内容是否符合预期。
3240
+
3241
+ ⚠️ 推荐用法:先调用 mobile_start_toast_watch,再执行操作,最后调用此工具
3242
+
3243
+ Args:
3244
+ expected_text: 期望的 Toast 文本
3245
+ timeout: 等待超时时间(秒)
3246
+ contains: True 表示包含匹配,False 表示精确匹配
3247
+
3248
+ Returns:
3249
+ 断言结果
3250
+ """
3251
+ if self._is_ios():
3252
+ return {
3253
+ "success": False,
3254
+ "passed": False,
3255
+ "message": "❌ iOS 不支持 Toast 检测"
3256
+ }
3257
+
3258
+ try:
3259
+ # 获取 Toast(不 reset,假设之前已经调用过 start_toast_watch)
3260
+ toast_message = self.client.u2.toast.get_message(
3261
+ wait_timeout=timeout,
3262
+ default=None
3263
+ )
3264
+
3265
+ if toast_message is None:
3266
+ return {
3267
+ "success": True,
3268
+ "passed": False,
3269
+ "expected": expected_text,
3270
+ "actual": None,
3271
+ "message": f"❌ 断言失败:未检测到 Toast 消息"
3272
+ }
3273
+
3274
+ # 匹配检查
3275
+ if contains:
3276
+ passed = expected_text in toast_message
3277
+ match_type = "包含"
3278
+ else:
3279
+ passed = expected_text == toast_message
3280
+ match_type = "精确"
3281
+
3282
+ if passed:
3283
+ return {
3284
+ "success": True,
3285
+ "passed": True,
3286
+ "expected": expected_text,
3287
+ "actual": toast_message,
3288
+ "match_type": match_type,
3289
+ "message": f"✅ Toast 断言通过:'{toast_message}'"
3290
+ }
3291
+ else:
3292
+ return {
3293
+ "success": True,
3294
+ "passed": False,
3295
+ "expected": expected_text,
3296
+ "actual": toast_message,
3297
+ "match_type": match_type,
3298
+ "message": f"❌ Toast 断言失败:期望 '{expected_text}',实际 '{toast_message}'"
3299
+ }
3300
+ except Exception as e:
3301
+ return {
3302
+ "success": False,
3303
+ "passed": False,
3304
+ "message": f"❌ Toast 断言异常: {e}"
3305
+ }
2405
3306
 
2406
3307
  def assert_text(self, text: str) -> Dict:
2407
3308
  """检查页面是否包含文本(支持精确匹配和包含匹配)"""
@@ -2487,11 +3388,16 @@ class BasicMobileToolsLite:
2487
3388
  f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
2488
3389
  "",
2489
3390
  "定位策略(按优先级):",
2490
- "1. ID 定位 - 最稳定,跨设备兼容",
2491
- "2. 文本定位 - 稳定,跨设备兼容",
3391
+ "1. 文本定位 - 最稳定,跨设备兼容",
3392
+ "2. ID 定位 - 稳定,跨设备兼容",
2492
3393
  "3. 百分比定位 - 跨分辨率兼容(坐标自动转换)",
3394
+ "",
3395
+ "运行方式:",
3396
+ " pytest {filename} -v # 使用 pytest 运行",
3397
+ " python {filename} # 直接运行",
2493
3398
  f'"""',
2494
3399
  "import time",
3400
+ "import pytest",
2495
3401
  "import uiautomator2 as u2",
2496
3402
  "",
2497
3403
  f'PACKAGE_NAME = "{package_name}"',
@@ -2567,22 +3473,52 @@ class BasicMobileToolsLite:
2567
3473
  " return True",
2568
3474
  "",
2569
3475
  "",
2570
- "def test_main():",
2571
- " # 连接设备",
2572
- " d = u2.connect()",
2573
- " d.implicitly_wait(10) # 设置全局等待",
2574
- " ",
2575
- " # 启动应用",
2576
- f" d.app_start(PACKAGE_NAME)",
2577
- " time.sleep(LAUNCH_WAIT) # 等待启动(可调整)",
3476
+ "def swipe_direction(d, direction):",
3477
+ ' """',
3478
+ ' 通用滑动方法(兼容所有 uiautomator2 版本)',
3479
+ ' ',
3480
+ ' Args:',
3481
+ ' d: uiautomator2 设备对象',
3482
+ ' direction: 滑动方向 (up/down/left/right)',
3483
+ ' """',
3484
+ " info = d.info",
3485
+ " width = info.get('displayWidth', 0)",
3486
+ " height = info.get('displayHeight', 0)",
3487
+ " cx, cy = width // 2, height // 2",
2578
3488
  " ",
2579
- " # 尝试关闭启动广告(可选,根据 App 情况调整)",
3489
+ " if direction == 'up':",
3490
+ " d.swipe(cx, int(height * 0.8), cx, int(height * 0.3))",
3491
+ " elif direction == 'down':",
3492
+ " d.swipe(cx, int(height * 0.3), cx, int(height * 0.8))",
3493
+ " elif direction == 'left':",
3494
+ " d.swipe(int(width * 0.8), cy, int(width * 0.2), cy)",
3495
+ " elif direction == 'right':",
3496
+ " d.swipe(int(width * 0.2), cy, int(width * 0.8), cy)",
3497
+ " return True",
3498
+ "",
3499
+ "",
3500
+ "# ========== pytest fixture ==========",
3501
+ "@pytest.fixture(scope='function')",
3502
+ "def device():",
3503
+ ' """pytest fixture: 连接设备并启动应用"""',
3504
+ " d = u2.connect()",
3505
+ " d.implicitly_wait(10)",
3506
+ " d.app_start(PACKAGE_NAME)",
3507
+ " time.sleep(LAUNCH_WAIT)",
2580
3508
  " if CLOSE_AD_ON_LAUNCH:",
2581
3509
  " close_ad_if_exists(d)",
3510
+ " yield d",
3511
+ " # 测试结束后可选择关闭应用",
3512
+ " # d.app_stop(PACKAGE_NAME)",
3513
+ "",
3514
+ "",
3515
+ f"def test_{safe_name}(device):",
3516
+ ' """测试用例主函数"""',
3517
+ " d = device",
2582
3518
  " ",
2583
3519
  ]
2584
3520
 
2585
- # 生成操作代码(跳过启动应用相关操作,因为脚本头部已处理)
3521
+ # 生成操作代码(使用标准记录格式,逻辑更简洁)
2586
3522
  step_num = 0
2587
3523
  for op in self.operation_history:
2588
3524
  action = op.get('action')
@@ -2594,130 +3530,122 @@ class BasicMobileToolsLite:
2594
3530
  step_num += 1
2595
3531
 
2596
3532
  if action == 'click':
2597
- ref = op.get('ref', '')
2598
- element = op.get('element', '')
2599
- has_coords = 'x' in op and 'y' in op
2600
- has_percent = 'x_percent' in op and 'y_percent' in op
2601
-
2602
- # 判断 ref 是否为坐标格式(coords_ 或 coords:)
2603
- is_coords_ref = ref.startswith('coords_') or ref.startswith('coords:')
2604
- is_percent_ref = ref.startswith('percent_')
2605
-
2606
- # 优先级:ID > 文本 > 百分比 > 坐标(兜底)
2607
- if ref and (':id/' in ref or ref.startswith('com.')):
2608
- # 1️⃣ 使用 resource-id(最稳定)
2609
- script_lines.append(f" # 步骤{step_num}: 点击元素 (ID定位,最稳定)")
2610
- script_lines.append(f" safe_click(d, d(resourceId='{ref}'))")
2611
- elif ref and not is_coords_ref and not is_percent_ref and ':' not in ref:
2612
- # 2️⃣ 使用文本(稳定)- 排除 "text:xxx" 等带冒号的格式
2613
- script_lines.append(f" # 步骤{step_num}: 点击文本 '{ref}' (文本定位)")
2614
- script_lines.append(f" safe_click(d, d(text='{ref}'))")
2615
- elif ref and ':' in ref and not is_coords_ref and not is_percent_ref:
2616
- # 2️⃣-b 使用文本(Android 的 text:xxx 或 description:xxx 格式)
2617
- # 提取冒号后面的实际文本值
2618
- actual_text = ref.split(':', 1)[1] if ':' in ref else ref
2619
- script_lines.append(f" # 步骤{step_num}: 点击文本 '{actual_text}' (文本定位)")
2620
- script_lines.append(f" safe_click(d, d(text='{actual_text}'))")
2621
- elif has_percent:
2622
- # 3️⃣ 使用百分比(跨分辨率兼容)
2623
- x_pct = op['x_percent']
2624
- y_pct = op['y_percent']
2625
- desc = f" ({element})" if element else ""
2626
- script_lines.append(f" # 步骤{step_num}: 点击位置{desc} (百分比定位,跨分辨率兼容)")
2627
- script_lines.append(f" click_by_percent(d, {x_pct}, {y_pct}) # 原坐标: ({op.get('x', '?')}, {op.get('y', '?')})")
2628
- elif has_coords:
2629
- # 4️⃣ 坐标兜底(不推荐,仅用于无法获取百分比的情况)
2630
- desc = f" ({element})" if element else ""
2631
- script_lines.append(f" # 步骤{step_num}: 点击坐标{desc} (⚠️ 坐标定位,可能不兼容其他分辨率)")
2632
- script_lines.append(f" d.click({op['x']}, {op['y']})")
3533
+ # 新格式:使用 locator_type 和 locator_value
3534
+ locator_type = op.get('locator_type', '')
3535
+ locator_value = op.get('locator_value', '')
3536
+ locator_attr = op.get('locator_attr', 'text')
3537
+ element_desc = op.get('element_desc', '')
3538
+ x_pct = op.get('x_percent', 0)
3539
+ y_pct = op.get('y_percent', 0)
3540
+
3541
+ # 转义单引号
3542
+ value_escaped = locator_value.replace("'", "\\'") if locator_value else ''
3543
+
3544
+ if locator_type == 'text':
3545
+ # 文本定位(最稳定)
3546
+ script_lines.append(f" # 步骤{step_num}: 点击 '{element_desc}' (文本定位)")
3547
+ if locator_attr == 'description':
3548
+ script_lines.append(f" safe_click(d, d(description='{value_escaped}'))")
3549
+ elif locator_attr == 'descriptionContains':
3550
+ script_lines.append(f" safe_click(d, d(descriptionContains='{value_escaped}'))")
3551
+ elif locator_attr == 'textContains':
3552
+ script_lines.append(f" safe_click(d, d(textContains='{value_escaped}'))")
3553
+ else:
3554
+ script_lines.append(f" safe_click(d, d(text='{value_escaped}'))")
3555
+ elif locator_type == 'id':
3556
+ # ID 定位(稳定)
3557
+ script_lines.append(f" # 步骤{step_num}: 点击 '{element_desc}' (ID定位)")
3558
+ script_lines.append(f" safe_click(d, d(resourceId='{value_escaped}'))")
3559
+ elif locator_type == 'percent':
3560
+ # 百分比定位(跨分辨率兼容)
3561
+ script_lines.append(f" # 步骤{step_num}: 点击 '{element_desc}' (百分比定位)")
3562
+ script_lines.append(f" click_by_percent(d, {x_pct}, {y_pct})")
2633
3563
  else:
2634
- continue # 无效操作,跳过
2635
-
2636
- script_lines.append(" time.sleep(0.5) # 等待响应")
3564
+ # 兼容旧格式
3565
+ ref = op.get('ref', '')
3566
+ if ref:
3567
+ ref_escaped = ref.replace("'", "\\'")
3568
+ script_lines.append(f" # 步骤{step_num}: 点击 '{ref}'")
3569
+ script_lines.append(f" safe_click(d, d(text='{ref_escaped}'))")
3570
+ else:
3571
+ continue
3572
+
3573
+ script_lines.append(" time.sleep(0.5)")
2637
3574
  script_lines.append(" ")
2638
3575
 
2639
3576
  elif action == 'input':
2640
3577
  text = op.get('text', '')
2641
- ref = op.get('ref', '')
2642
- has_coords = 'x' in op and 'y' in op
2643
- has_percent = 'x_percent' in op and 'y_percent' in op
2644
-
2645
- # 判断 ref 是否为坐标格式
2646
- is_coords_ref = ref.startswith('coords_') or ref.startswith('coords:')
2647
-
2648
- # 优先使用 ID,其次百分比,最后坐标
2649
- if ref and not is_coords_ref and (':id/' in ref or ref.startswith('com.')):
2650
- # 完整格式的 resource-id
2651
- script_lines.append(f" # 步骤{step_num}: 输入文本 '{text}' (ID定位)")
2652
- script_lines.append(f" d(resourceId='{ref}').set_text('{text}')")
2653
- elif ref and not is_coords_ref and not has_coords:
2654
- # 简短格式的 resource-id(不包含 com. 或 :id/)
2655
- script_lines.append(f" # 步骤{step_num}: 输入文本 '{text}' (ID定位)")
2656
- script_lines.append(f" d(resourceId='{ref}').set_text('{text}')")
2657
- elif has_percent:
2658
- x_pct = op['x_percent']
2659
- y_pct = op['y_percent']
2660
- script_lines.append(f" # 步骤{step_num}: 点击后输入 (百分比定位)")
3578
+ locator_type = op.get('locator_type', '')
3579
+ locator_value = op.get('locator_value', '')
3580
+ x_pct = op.get('x_percent', 0)
3581
+ y_pct = op.get('y_percent', 0)
3582
+
3583
+ text_escaped = text.replace("'", "\\'")
3584
+ value_escaped = locator_value.replace("'", "\\'") if locator_value else ''
3585
+
3586
+ if locator_type == 'id':
3587
+ script_lines.append(f" # 步骤{step_num}: 输入 '{text}' (ID定位)")
3588
+ script_lines.append(f" d(resourceId='{value_escaped}').set_text('{text_escaped}')")
3589
+ elif locator_type == 'class':
3590
+ script_lines.append(f" # 步骤{step_num}: 输入 '{text}' (类名定位)")
3591
+ script_lines.append(f" d(className='android.widget.EditText').set_text('{text_escaped}')")
3592
+ elif x_pct > 0 and y_pct > 0:
3593
+ script_lines.append(f" # 步骤{step_num}: 点击后输入 '{text}'")
2661
3594
  script_lines.append(f" click_by_percent(d, {x_pct}, {y_pct})")
2662
- script_lines.append(f" time.sleep(0.3)")
2663
- script_lines.append(f" d.send_keys('{text}')")
2664
- elif has_coords:
2665
- script_lines.append(f" # 步骤{step_num}: 点击坐标后输入 (⚠️ 可能不兼容其他分辨率)")
2666
- script_lines.append(f" d.click({op['x']}, {op['y']})")
2667
- script_lines.append(f" time.sleep(0.3)")
2668
- script_lines.append(f" d.send_keys('{text}')")
3595
+ script_lines.append(" time.sleep(0.3)")
3596
+ script_lines.append(f" d.send_keys('{text_escaped}')")
2669
3597
  else:
2670
- # 兜底:无法识别的格式,跳过
2671
- continue
3598
+ # 兼容旧格式
3599
+ ref = op.get('ref', '')
3600
+ if ref:
3601
+ script_lines.append(f" # 步骤{step_num}: 输入 '{text}'")
3602
+ script_lines.append(f" d(resourceId='{ref}').set_text('{text_escaped}')")
3603
+ else:
3604
+ continue
3605
+
2672
3606
  script_lines.append(" time.sleep(0.5)")
2673
3607
  script_lines.append(" ")
2674
3608
 
2675
3609
  elif action == 'long_press':
2676
- ref = op.get('ref', '')
2677
- element = op.get('element', '')
3610
+ locator_type = op.get('locator_type', '')
3611
+ locator_value = op.get('locator_value', '')
3612
+ locator_attr = op.get('locator_attr', 'text')
3613
+ element_desc = op.get('element_desc', '')
2678
3614
  duration = op.get('duration', 1.0)
2679
- has_coords = 'x' in op and 'y' in op
2680
- has_percent = 'x_percent' in op and 'y_percent' in op
2681
-
2682
- # 判断 ref 是否为坐标格式
2683
- is_coords_ref = ref.startswith('coords_') or ref.startswith('coords:')
2684
- is_percent_ref = ref.startswith('percent_')
2685
-
2686
- # 优先级:ID > 文本 > 百分比 > 坐标
2687
- if ref and (':id/' in ref or ref.startswith('com.')):
2688
- # 使用 resource-id
2689
- script_lines.append(f" # 步骤{step_num}: 长按元素 (ID定位,最稳定)")
2690
- script_lines.append(f" d(resourceId='{ref}').long_click(duration={duration})")
2691
- elif ref and not is_coords_ref and not is_percent_ref and ':' not in ref:
2692
- # 使用文本
2693
- script_lines.append(f" # 步骤{step_num}: 长按文本 '{ref}' (文本定位)")
2694
- script_lines.append(f" d(text='{ref}').long_click(duration={duration})")
2695
- elif ref and ':' in ref and not is_coords_ref and not is_percent_ref:
2696
- actual_text = ref.split(':', 1)[1] if ':' in ref else ref
2697
- script_lines.append(f" # 步骤{step_num}: 长按文本 '{actual_text}' (文本定位)")
2698
- script_lines.append(f" d(text='{actual_text}').long_click(duration={duration})")
2699
- elif has_percent:
2700
- # 使用百分比
2701
- x_pct = op['x_percent']
2702
- y_pct = op['y_percent']
2703
- desc = f" ({element})" if element else ""
2704
- script_lines.append(f" # 步骤{step_num}: 长按位置{desc} (百分比定位,跨分辨率兼容)")
2705
- script_lines.append(f" long_press_by_percent(d, {x_pct}, {y_pct}, duration={duration}) # 原坐标: ({op.get('x', '?')}, {op.get('y', '?')})")
2706
- elif has_coords:
2707
- # 坐标兜底
2708
- desc = f" ({element})" if element else ""
2709
- script_lines.append(f" # 步骤{step_num}: 长按坐标{desc} (⚠️ 坐标定位,可能不兼容其他分辨率)")
2710
- script_lines.append(f" d.long_click({op['x']}, {op['y']}, duration={duration})")
3615
+ x_pct = op.get('x_percent', 0)
3616
+ y_pct = op.get('y_percent', 0)
3617
+
3618
+ value_escaped = locator_value.replace("'", "\\'") if locator_value else ''
3619
+
3620
+ if locator_type == 'text':
3621
+ script_lines.append(f" # 步骤{step_num}: 长按 '{element_desc}'")
3622
+ if locator_attr == 'description':
3623
+ script_lines.append(f" d(description='{value_escaped}').long_click(duration={duration})")
3624
+ else:
3625
+ script_lines.append(f" d(text='{value_escaped}').long_click(duration={duration})")
3626
+ elif locator_type == 'id':
3627
+ script_lines.append(f" # 步骤{step_num}: 长按 '{element_desc}'")
3628
+ script_lines.append(f" d(resourceId='{value_escaped}').long_click(duration={duration})")
3629
+ elif locator_type == 'percent':
3630
+ script_lines.append(f" # 步骤{step_num}: 长按 '{element_desc}'")
3631
+ script_lines.append(f" long_press_by_percent(d, {x_pct}, {y_pct}, duration={duration})")
2711
3632
  else:
2712
- continue
2713
-
2714
- script_lines.append(" time.sleep(0.5) # 等待响应")
3633
+ # 兼容旧格式
3634
+ ref = op.get('ref', '')
3635
+ if ref:
3636
+ ref_escaped = ref.replace("'", "\\'")
3637
+ script_lines.append(f" # 步骤{step_num}: 长按 '{ref}'")
3638
+ script_lines.append(f" d(text='{ref_escaped}').long_click(duration={duration})")
3639
+ else:
3640
+ continue
3641
+
3642
+ script_lines.append(" time.sleep(0.5)")
2715
3643
  script_lines.append(" ")
2716
3644
 
2717
3645
  elif action == 'swipe':
2718
3646
  direction = op.get('direction', 'up')
2719
3647
  script_lines.append(f" # 步骤{step_num}: 滑动 {direction}")
2720
- script_lines.append(f" d.swipe_ext('{direction}')")
3648
+ script_lines.append(f" swipe_direction(d, '{direction}')")
2721
3649
  script_lines.append(" time.sleep(0.5)")
2722
3650
  script_lines.append(" ")
2723
3651
 
@@ -2732,8 +3660,16 @@ class BasicMobileToolsLite:
2732
3660
  " print('✅ 测试完成')",
2733
3661
  "",
2734
3662
  "",
3663
+ "# ========== 直接运行入口 ==========",
2735
3664
  "if __name__ == '__main__':",
2736
- " test_main()",
3665
+ " # 直接运行时,手动创建设备连接",
3666
+ " _d = u2.connect()",
3667
+ " _d.implicitly_wait(10)",
3668
+ " _d.app_start(PACKAGE_NAME)",
3669
+ " time.sleep(LAUNCH_WAIT)",
3670
+ " if CLOSE_AD_ON_LAUNCH:",
3671
+ " close_ad_if_exists(_d)",
3672
+ f" test_{safe_name}(_d)",
2737
3673
  ])
2738
3674
 
2739
3675
  script = '\n'.join(script_lines)
@@ -2742,8 +3678,11 @@ class BasicMobileToolsLite:
2742
3678
  output_dir = Path("tests")
2743
3679
  output_dir.mkdir(exist_ok=True)
2744
3680
 
3681
+ # 确保文件名符合 pytest 规范(以 test_ 开头)
2745
3682
  if not filename.endswith('.py'):
2746
3683
  filename = f"{filename}.py"
3684
+ if not filename.startswith('test_'):
3685
+ filename = f"test_{filename}"
2747
3686
 
2748
3687
  file_path = output_dir / filename
2749
3688
  file_path.write_text(script, encoding='utf-8')
@@ -2751,7 +3690,7 @@ class BasicMobileToolsLite:
2751
3690
  return {
2752
3691
  "success": True,
2753
3692
  "file_path": str(file_path),
2754
- "message": f"✅ 脚本已生成: {file_path}",
3693
+ "message": f"✅ 脚本已生成: {file_path}\n💡 运行方式: pytest {file_path} -v 或 python {file_path}",
2755
3694
  "operations_count": len(self.operation_history),
2756
3695
  "preview": script[:500] + "..."
2757
3696
  }
@@ -2920,10 +3859,28 @@ class BasicMobileToolsLite:
2920
3859
  try:
2921
3860
  import xml.etree.ElementTree as ET
2922
3861
 
2923
- # ========== 第1步:控件树查找关闭按钮(使用完整 UI 层级)==========
2924
- xml_string = self._get_full_hierarchy()
3862
+ # ========== 第0步:先检测是否有弹窗 ==========
3863
+ xml_string = self.client.u2.dump_hierarchy(compressed=False)
2925
3864
  root = ET.fromstring(xml_string)
2926
3865
 
3866
+ screen_width = self.client.u2.info.get('displayWidth', 1440)
3867
+ screen_height = self.client.u2.info.get('displayHeight', 3200)
3868
+
3869
+ popup_bounds, popup_confidence = self._detect_popup_with_confidence(
3870
+ root, screen_width, screen_height
3871
+ )
3872
+
3873
+ # 如果没有检测到弹窗,直接返回"无弹窗"
3874
+ if popup_bounds is None or popup_confidence < 0.5:
3875
+ result["success"] = True
3876
+ result["method"] = None
3877
+ result["message"] = "ℹ️ 当前页面未检测到弹窗,无需关闭"
3878
+ result["popup_detected"] = False
3879
+ result["popup_confidence"] = popup_confidence
3880
+ return result
3881
+
3882
+ # ========== 第1步:控件树查找关闭按钮 ==========
3883
+
2927
3884
  # 关闭按钮的常见特征
2928
3885
  close_keywords = ['关闭', '跳过', '×', 'X', 'x', 'close', 'skip', '取消']
2929
3886
  close_content_desc = ['关闭', '跳过', 'close', 'skip', 'dismiss']
@@ -3002,32 +3959,40 @@ class BasicMobileToolsLite:
3002
3959
  cx, cy = best['center']
3003
3960
  bounds = best['bounds']
3004
3961
 
3005
- # 点击前截图(用于自动学习)
3006
- pre_screenshot = None
3007
- if auto_learn:
3008
- pre_result = self.take_screenshot(description="关闭前", compress=False)
3009
- pre_screenshot = pre_result.get("screenshot_path")
3010
-
3011
- # 点击
3012
- self.click_at_coords(cx, cy)
3962
+ # 点击(click_at_coords 内部已包含应用状态检查和自动返回)
3963
+ click_result = self.click_at_coords(cx, cy)
3013
3964
  time.sleep(0.5)
3014
3965
 
3966
+ # 🎯 再次检查应用状态(确保弹窗去除没有导致应用跳转)
3967
+ app_check = self._check_app_switched()
3968
+ return_result = None
3969
+
3970
+ if app_check['switched']:
3971
+ # 应用已跳转,说明弹窗去除失败,尝试返回目标应用
3972
+ return_result = self._return_to_target_app()
3973
+
3015
3974
  result["success"] = True
3016
3975
  result["method"] = "控件树"
3017
- result["message"] = f"✅ 通过控件树找到关闭按钮并点击\n" \
3018
- f" 位置: ({cx}, {cy})\n" \
3019
- f" 原因: {best['reason']}"
3976
+ msg = f"✅ 通过控件树找到关闭按钮并点击\n" \
3977
+ f" 位置: ({cx}, {cy})\n" \
3978
+ f" 原因: {best['reason']}"
3020
3979
 
3021
- # 自动学习:检查这个 X 是否已在模板库,不在就添加
3022
- if auto_learn and pre_screenshot:
3023
- learn_result = self._auto_learn_template(pre_screenshot, bounds)
3024
- if learn_result:
3025
- result["learned_template"] = learn_result
3026
- result["message"] += f"\n📚 自动学习: {learn_result}"
3980
+ if app_check['switched']:
3981
+ msg += f"\n⚠️ 应用已跳转,说明弹窗去除失败"
3982
+ if return_result:
3983
+ if return_result['success']:
3984
+ msg += f"\n{return_result['message']}"
3985
+ else:
3986
+ msg += f"\n❌ 自动返回失败: {return_result['message']}"
3987
+
3988
+ result["message"] = msg
3989
+ result["app_check"] = app_check
3990
+ result["return_to_app"] = return_result
3991
+ result["tip"] = "💡 建议调用 mobile_screenshot_with_som 确认弹窗是否已关闭"
3027
3992
 
3028
3993
  return result
3029
3994
 
3030
- # ========== 第2步:模板匹配 ==========
3995
+ # ========== 第2步:模板匹配(自动执行,不需要 AI 介入)==========
3031
3996
  screenshot_path = None
3032
3997
  try:
3033
3998
  from .template_matcher import TemplateMatcher
@@ -3047,15 +4012,30 @@ class BasicMobileToolsLite:
3047
4012
  y_pct = best["percent"]["y"]
3048
4013
 
3049
4014
  # 点击
3050
- self.click_by_percent(x_pct, y_pct)
4015
+ click_result = self.click_by_percent(x_pct, y_pct)
3051
4016
  time.sleep(0.5)
3052
4017
 
4018
+ app_check = self._check_app_switched()
4019
+ return_result = None
4020
+
4021
+ if app_check['switched']:
4022
+ return_result = self._return_to_target_app()
4023
+
3053
4024
  result["success"] = True
3054
4025
  result["method"] = "模板匹配"
3055
- result["message"] = f"✅ 通过模板匹配找到关闭按钮并点击\n" \
3056
- f" 模板: {best.get('template', 'unknown')}\n" \
3057
- f" 置信度: {best.get('confidence', 'N/A')}%\n" \
3058
- f" 位置: ({x_pct:.1f}%, {y_pct:.1f}%)"
4026
+ msg = f"✅ 通过模板匹配找到关闭按钮并点击\n" \
4027
+ f" 模板: {best.get('template', 'unknown')}\n" \
4028
+ f" 置信度: {best.get('confidence', 'N/A')}%\n" \
4029
+ f" 位置: ({x_pct:.1f}%, {y_pct:.1f}%)"
4030
+
4031
+ if app_check['switched']:
4032
+ msg += f"\n⚠️ 应用已跳转"
4033
+ if return_result:
4034
+ msg += f"\n{return_result['message']}"
4035
+
4036
+ result["message"] = msg
4037
+ result["app_check"] = app_check
4038
+ result["return_to_app"] = return_result
3059
4039
  return result
3060
4040
 
3061
4041
  except ImportError:
@@ -3063,17 +4043,12 @@ class BasicMobileToolsLite:
3063
4043
  except Exception:
3064
4044
  pass # 模板匹配失败,继续下一步
3065
4045
 
3066
- # ========== 第3步:返回截图供 AI 分析 ==========
3067
- if not screenshot_path:
3068
- screenshot_result = self.take_screenshot(description="需要AI分析", compress=True)
3069
-
4046
+ # ========== 第3步:控件树和模板匹配都失败,提示 AI 使用视觉识别 ==========
3070
4047
  result["success"] = False
4048
+ result["fallback"] = "vision"
3071
4049
  result["method"] = None
3072
- result["message"] = "❌ 控件树和模板匹配都未找到关闭按钮\n" \
3073
- "📸 已截图,请 AI 分析图片中的 X 按钮位置\n" \
3074
- "💡 找到后使用 mobile_click_by_percent(x%, y%) 点击"
3075
- result["screenshot"] = screenshot_result if not screenshot_path else {"screenshot_path": screenshot_path}
3076
- result["need_ai_analysis"] = True
4050
+ result["popup_detected"] = True
4051
+ result["message"] = "⚠️ 控件树和模板匹配都未找到关闭按钮,请调用 mobile_screenshot_with_som 截图后用 click_by_som 点击"
3077
4052
 
3078
4053
  return result
3079
4054