mobile-mcp-ai 2.2.6__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. mobile_mcp/config.py +3 -2
  2. mobile_mcp/core/basic_tools_lite.py +3193 -0
  3. mobile_mcp/core/ios_client_wda.py +569 -0
  4. mobile_mcp/core/ios_device_manager_wda.py +306 -0
  5. mobile_mcp/core/mobile_client.py +246 -20
  6. mobile_mcp/core/template_matcher.py +429 -0
  7. mobile_mcp/core/templates/close_buttons/auto_x_0112_151217.png +0 -0
  8. mobile_mcp/core/templates/close_buttons/auto_x_0112_152037.png +0 -0
  9. mobile_mcp/core/templates/close_buttons/auto_x_0112_152840.png +0 -0
  10. mobile_mcp/core/templates/close_buttons/auto_x_0112_153256.png +0 -0
  11. mobile_mcp/core/templates/close_buttons/auto_x_0112_154847.png +0 -0
  12. mobile_mcp/core/templates/close_buttons/gray_x_stock_ad.png +0 -0
  13. mobile_mcp/mcp_tools/__init__.py +10 -0
  14. mobile_mcp/mcp_tools/mcp_server.py +992 -0
  15. mobile_mcp_ai-2.5.3.dist-info/METADATA +456 -0
  16. mobile_mcp_ai-2.5.3.dist-info/RECORD +32 -0
  17. mobile_mcp_ai-2.5.3.dist-info/entry_points.txt +2 -0
  18. mobile_mcp/core/ai/__init__.py +0 -11
  19. mobile_mcp/core/ai/ai_analyzer.py +0 -197
  20. mobile_mcp/core/ai/ai_config.py +0 -116
  21. mobile_mcp/core/ai/ai_platform_adapter.py +0 -399
  22. mobile_mcp/core/ai/smart_test_executor.py +0 -520
  23. mobile_mcp/core/ai/test_generator.py +0 -365
  24. mobile_mcp/core/ai/test_generator_from_history.py +0 -391
  25. mobile_mcp/core/ai/test_generator_standalone.py +0 -293
  26. mobile_mcp/core/assertion/__init__.py +0 -9
  27. mobile_mcp/core/assertion/smart_assertion.py +0 -341
  28. mobile_mcp/core/basic_tools.py +0 -945
  29. mobile_mcp/core/h5/__init__.py +0 -10
  30. mobile_mcp/core/h5/h5_handler.py +0 -548
  31. mobile_mcp/core/ios_client.py +0 -219
  32. mobile_mcp/core/ios_device_manager.py +0 -252
  33. mobile_mcp/core/locator/__init__.py +0 -10
  34. mobile_mcp/core/locator/cursor_ai_auto_analyzer.py +0 -119
  35. mobile_mcp/core/locator/cursor_vision_helper.py +0 -414
  36. mobile_mcp/core/locator/mobile_smart_locator.py +0 -1747
  37. mobile_mcp/core/locator/position_analyzer.py +0 -813
  38. mobile_mcp/core/locator/script_updater.py +0 -157
  39. mobile_mcp/core/nl_test_runner.py +0 -585
  40. mobile_mcp/core/smart_app_launcher.py +0 -421
  41. mobile_mcp/core/smart_tools.py +0 -311
  42. mobile_mcp/mcp/__init__.py +0 -13
  43. mobile_mcp/mcp/mcp_server.py +0 -1126
  44. mobile_mcp/mcp/mcp_server_simple.py +0 -23
  45. mobile_mcp/vision/__init__.py +0 -10
  46. mobile_mcp/vision/vision_locator.py +0 -405
  47. mobile_mcp_ai-2.2.6.dist-info/METADATA +0 -503
  48. mobile_mcp_ai-2.2.6.dist-info/RECORD +0 -49
  49. mobile_mcp_ai-2.2.6.dist-info/entry_points.txt +0 -2
  50. {mobile_mcp_ai-2.2.6.dist-info → mobile_mcp_ai-2.5.3.dist-info}/WHEEL +0 -0
  51. {mobile_mcp_ai-2.2.6.dist-info → mobile_mcp_ai-2.5.3.dist-info}/licenses/LICENSE +0 -0
  52. {mobile_mcp_ai-2.2.6.dist-info → mobile_mcp_ai-2.5.3.dist-info}/top_level.txt +0 -0
@@ -1,1747 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- 移动端SmartLocator - 独立实现(无外部依赖)
5
-
6
- 策略:
7
- 1. Level 1: 规则匹配(免费,85%)- 独立实现
8
- 2. Level 2: 缓存查询(免费,5%)
9
- 3. Level 3: XML深度分析(免费,5%)
10
- 4. Level 4: 视觉识别(付费,4%)
11
- 5. Level 5: 文本AI分析(付费,1%)- 使用 mobile_mcp 自己的 AI 模块
12
-
13
- 注意:此模块已完全解耦,不依赖 browser_mcp
14
- """
15
- import hashlib
16
- import time
17
- from typing import Dict, Optional
18
- import sys
19
- from pathlib import Path as PathLib
20
-
21
- # 独立实现,不再依赖 browser_mcp
22
- SMART_LOCATOR_AVAILABLE = False # 使用自己的规则匹配逻辑
23
-
24
-
25
- class MobileSmartLocator:
26
- """
27
- 移动端SmartLocator适配器
28
-
29
- 复用现有SmartLocator逻辑,适配移动端格式
30
- """
31
-
32
- def __init__(self, mobile_client):
33
- """
34
- 初始化移动端SmartLocator
35
-
36
- Args:
37
- mobile_client: MobileClient实例
38
- """
39
- self.mobile_client = mobile_client
40
-
41
- # 缓存
42
- self._cache: Dict[str, Dict] = {}
43
- self._cache_ttl = 300 # 5分钟
44
-
45
- # 统计
46
- self.stats = {
47
- 'total': 0,
48
- 'rule_hits': 0,
49
- 'cache_hits': 0,
50
- 'quick_match_hits': 0,
51
- 'xml_analysis': 0,
52
- 'vision_calls': 0,
53
- 'ai_calls': 0,
54
- 'xml_read_count': 0, # XML读取次数
55
- 'total_time': 0.0, # 总耗时(毫秒)
56
- }
57
-
58
- # 性能监控
59
- self.performance_logs = [] # 详细性能日志
60
-
61
- # 不再依赖外部 SmartLocator,使用独立实现
62
- self.smart_locator = None
63
-
64
- async def locate(self, query: str, wait_for_popup: bool = True, max_wait: float = 3.0) -> Optional[Dict]:
65
- """
66
- 智能定位元素
67
-
68
- Args:
69
- query: 自然语言查询
70
- wait_for_popup: 是否等待弹窗出现(默认True,适用于弹窗场景)
71
- max_wait: 最大等待时间(秒,默认3秒)
72
-
73
- Returns:
74
- 定位结果 或 None
75
- """
76
- import time
77
- start_time = time.time()
78
-
79
- self.stats['total'] += 1
80
-
81
- print(f"\n🔍 MobileSmartLocator 定位: {query}", file=sys.stderr)
82
-
83
- # Level 1: 缓存查询(最快)
84
- cache_start = time.time()
85
- cache_result = await self._try_cache(query)
86
- cache_time = (time.time() - cache_start) * 1000
87
-
88
- if cache_result:
89
- self.stats['cache_hits'] += 1
90
- elapsed_time = (time.time() - start_time) * 1000
91
- self.stats['total_time'] += elapsed_time
92
- print(f" ✅ 缓存命中!耗时: {elapsed_time:.2f}ms", file=sys.stderr)
93
- self._log_performance(query, 'cache', elapsed_time, 0)
94
- return cache_result
95
-
96
- # 🎯 弹窗场景:如果启用等待,先等待一段时间让弹窗出现
97
- if wait_for_popup:
98
- import asyncio
99
- await asyncio.sleep(0.5) # 先等待0.5秒,让弹窗有时间出现
100
-
101
- # ⚡ 优化:一次定位只读一次XML(避免重复读取,节省400-1000ms)
102
- print(f" 📱 读取页面XML...", file=sys.stderr)
103
-
104
- # 分步计时:XML读取
105
- xml_read_start = time.time()
106
- xml_string = self.mobile_client.u2.dump_hierarchy()
107
- xml_read_time = (time.time() - xml_read_start) * 1000
108
- print(f" ⏱️ XML读取: {xml_read_time:.2f}ms", file=sys.stderr)
109
-
110
- # 分步计时:XML解析
111
- xml_parse_start = time.time()
112
- elements = self.mobile_client.xml_parser.parse(xml_string)
113
- xml_parse_time = (time.time() - xml_parse_start) * 1000
114
- print(f" ⏱️ XML解析: {xml_parse_time:.2f}ms (共{len(elements)}个元素)", file=sys.stderr)
115
-
116
- xml_time = xml_read_time + xml_parse_time
117
- self.stats['xml_read_count'] += 1
118
- print(f" ✅ XML处理完成,总耗时: {xml_time:.2f}ms (读取: {xml_read_time:.0f}ms + 解析: {xml_parse_time:.0f}ms)", file=sys.stderr)
119
-
120
- # Level 1.5: 快速预匹配(针对容易歧义的查询)
121
- # 例如:"点击 输入邮箱" - 包含"输入"但不是输入操作,而是页签
122
- quick_result = await self._try_quick_match(elements, query)
123
- if quick_result:
124
- self.stats['quick_match_hits'] += 1
125
- elapsed_time = (time.time() - start_time) * 1000
126
- self.stats['total_time'] += elapsed_time
127
- print(f" ✅ 快速预匹配成功!总耗时: {elapsed_time:.2f}ms (XML: {xml_time:.2f}ms)", file=sys.stderr)
128
- await self._cache_result(query, quick_result)
129
- self._log_performance(query, 'quick_match', elapsed_time, 1, xml_time)
130
- return quick_result
131
-
132
- # Level 2: 移动端规则匹配(独立实现)
133
- rule_result = await self._try_rule_match(elements, query)
134
- if rule_result:
135
- self.stats['rule_hits'] += 1
136
- elapsed_time = (time.time() - start_time) * 1000
137
- self.stats['total_time'] += elapsed_time
138
- print(f" ✅ 规则匹配成功!总耗时: {elapsed_time:.2f}ms (XML: {xml_time:.2f}ms)", file=sys.stderr)
139
- await self._cache_result(query, rule_result)
140
- self._log_performance(query, 'rule_match', elapsed_time, 1, xml_time)
141
- return rule_result
142
-
143
- # Level 3: XML深度分析(免费,快速)
144
- xml_result, candidates = await self._try_xml_analysis(elements, query)
145
- if xml_result:
146
- self.stats['xml_analysis'] += 1
147
- elapsed_time = (time.time() - start_time) * 1000
148
- self.stats['total_time'] += elapsed_time
149
- print(f" ✅ XML分析成功: {xml_result.get('element', '')} 总耗时: {elapsed_time:.2f}ms (XML: {xml_time:.2f}ms)", file=sys.stderr)
150
- await self._cache_result(query, xml_result)
151
- self._log_performance(query, 'xml_analysis', elapsed_time, 1, xml_time)
152
- return xml_result
153
-
154
- # Level 3.5: 位置分析(免费,快速)⭐ 新增
155
- position_result = await self._try_position_analysis(elements, query)
156
- if position_result:
157
- self.stats['position_analysis'] = self.stats.get('position_analysis', 0) + 1
158
- elapsed_time = (time.time() - start_time) * 1000
159
- self.stats['total_time'] += elapsed_time
160
- print(f" ✅ 位置分析成功!总耗时: {elapsed_time:.2f}ms (XML: {xml_time:.2f}ms)", file=sys.stderr)
161
- await self._cache_result(query, position_result)
162
- self._log_performance(query, 'position_analysis', elapsed_time, 1, xml_time)
163
- return position_result
164
-
165
- # 🎯 架构优化:检测弹窗/覆盖层场景
166
- # 如果XML元素很少(<50个),可能是弹窗/覆盖层,优先使用视觉识别
167
- is_popup_scenario = len(elements) < 50 and not candidates
168
-
169
- # Level 3.6: AI智能兜底(分析候选元素)
170
- # 前提:有候选元素(说明XML中有相关元素,只是不确定选哪个)
171
- if candidates:
172
- print(f" 📋 Level 3.6: AI智能兜底 (有{len(candidates)}个候选元素)...", file=sys.stderr)
173
- ai_result = await self._try_ai_candidates(query, candidates, elements)
174
- if ai_result:
175
- self.stats['ai_calls'] += 1
176
- elapsed_time = (time.time() - start_time) * 1000
177
- self.stats['total_time'] += elapsed_time
178
- print(f" ✅ AI智能兜底成功!总耗时: {elapsed_time:.2f}ms (XML: {xml_time:.2f}ms)", file=sys.stderr)
179
- await self._cache_result(query, ai_result)
180
- self._log_performance(query, 'ai_smart_fallback', elapsed_time, 1, xml_time)
181
- return ai_result
182
-
183
- # 🎯 架构优化:弹窗场景优先使用视觉识别
184
- # 如果XML元素很少且没有候选,说明可能是弹窗/覆盖层,视觉识别更有效
185
- if is_popup_scenario:
186
- print(f" 🎯 检测到弹窗场景(XML元素少: {len(elements)}个),优先使用视觉识别...", file=sys.stderr)
187
- vision_result = await self._try_vision(query)
188
- if vision_result:
189
- self.stats['vision_calls'] += 1
190
- elapsed_time = (time.time() - start_time) * 1000
191
- self.stats['total_time'] += elapsed_time
192
- print(f" ✅ 视觉识别成功!总耗时: {elapsed_time:.2f}ms", file=sys.stderr)
193
- await self._cache_result(query, vision_result)
194
- self._log_performance(query, 'vision', elapsed_time, 1, xml_time)
195
- return vision_result
196
-
197
- # Level 4: 文本AI分析(需要AI配置)
198
- # 场景:XML中有元素但无法匹配(需要AI理解语义)
199
- print(f" ⚠️ XML分析失败,尝试AI分析...", file=sys.stderr)
200
- ai_result = await self._try_ai_analysis(query, elements)
201
- if ai_result:
202
- self.stats['ai_calls'] += 1
203
- elapsed_time = (time.time() - start_time) * 1000
204
- self.stats['total_time'] += elapsed_time
205
- print(f" ✅ AI分析成功!总耗时: {elapsed_time:.2f}ms", file=sys.stderr)
206
- await self._cache_result(query, ai_result)
207
- self._log_performance(query, 'ai_analysis', elapsed_time, 1, xml_time) # 传入已解析的elements,不重复读XML
208
- return ai_result
209
-
210
- # Level 5: 视觉识别(最后兜底,多模态)
211
- # 场景:所有方法都失败,视觉识别是最后手段
212
- vision_result = None
213
- if not is_popup_scenario: # 如果之前已经尝试过视觉识别,不再重复
214
- print(f" ⚠️ AI分析也失败,尝试视觉识别(最后兜底)...", file=sys.stderr)
215
- vision_result = await self._try_vision(query)
216
- if vision_result:
217
- self.stats['vision_calls'] += 1
218
- elapsed_time = (time.time() - start_time) * 1000
219
- self.stats['total_time'] += elapsed_time
220
- print(f" ✅ 视觉识别成功!总耗时: {elapsed_time:.2f}ms", file=sys.stderr)
221
- await self._cache_result(query, vision_result)
222
- self._log_performance(query, 'vision', elapsed_time, 1, xml_time)
223
- return vision_result
224
-
225
- # 🎯 最后兜底:使用Cursor AI视觉识别(截图分析)
226
- # 类似@browser的行为:当所有定位方法都失败时,自动截图并请求Cursor AI分析
227
- # ⚠️ 如果查询包含位置信息(如"右上角"),且位置分析已失败,直接返回None,不等待Cursor AI
228
- position_keywords = ['右上角', '左上角', '右下角', '左下角', '顶部', '底部', '左侧', '右侧']
229
- has_position_keyword = any(kw in query for kw in position_keywords)
230
-
231
- if has_position_keyword:
232
- elapsed_time = (time.time() - start_time) * 1000
233
- print(f" ❌ 所有定位方法都失败(包含位置关键词,不使用Cursor AI),总耗时: {elapsed_time:.2f}ms", file=sys.stderr)
234
- return None
235
- try:
236
- from .cursor_vision_helper import CursorVisionHelper
237
- cursor_helper = CursorVisionHelper(self.mobile_client)
238
- # 🎯 直接截图并创建请求文件,不等待(让Cursor AI主动分析)
239
- # 智能选择截图区域
240
- region = cursor_helper._smart_region_selection(query)
241
- screenshot_path = await cursor_helper.take_screenshot(query, region=region)
242
-
243
- # 创建请求文件
244
- import datetime
245
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
246
- request_id = f"{timestamp}_{hash(query) % 10000}"
247
- request_file = cursor_helper.request_dir / f"request_{request_id}.json"
248
-
249
- request_data = {
250
- "request_id": request_id,
251
- "screenshot_path": screenshot_path,
252
- "element_desc": query,
253
- "region": region,
254
- "timestamp": timestamp,
255
- "status": "pending"
256
- }
257
-
258
- with open(request_file, 'w', encoding='utf-8') as f:
259
- import json
260
- json.dump(request_data, f, ensure_ascii=False, indent=2)
261
-
262
- print(f" 📸 已截图并创建分析请求 (request_id: {request_id})", file=sys.stderr)
263
-
264
- # 🎯 返回特殊标记,让MCP服务器知道需要Cursor AI分析
265
- # 返回一个包含请求信息的字典,而不是None
266
- return {
267
- 'element': query,
268
- 'ref': f"cursor_vision_request_{request_id}",
269
- 'confidence': 0,
270
- 'method': 'cursor_vision_pending',
271
- 'screenshot_path': screenshot_path,
272
- 'request_id': request_id,
273
- 'status': 'pending_analysis'
274
- }
275
- except Exception as e:
276
- print(f" ⚠️ Cursor视觉识别失败: {e}", file=sys.stderr)
277
- import traceback
278
- traceback.print_exc()
279
-
280
- elapsed_time = (time.time() - start_time) * 1000
281
- print(f" ❌ 所有定位方法都失败(包括Cursor视觉识别),总耗时: {elapsed_time:.2f}ms", file=sys.stderr)
282
- return None
283
-
284
- async def _try_cache(self, query: str) -> Optional[Dict]:
285
- """尝试从缓存获取"""
286
- cache_key = self._get_cache_key(query)
287
- if cache_key in self._cache:
288
- cached = self._cache[cache_key]
289
- if time.time() - cached['timestamp'] < self._cache_ttl:
290
- return cached['result']
291
- else:
292
- # 缓存过期
293
- del self._cache[cache_key]
294
- return None
295
-
296
- async def _try_quick_match(self, elements: list, query: str) -> Optional[Dict]:
297
- """
298
- 快速预匹配(针对容易歧义的查询)
299
-
300
- 场景:
301
- 1. "输入邮箱" - 包含"输入"但实际是页签,不是输入操作
302
- 2. "输入XXX"但不是"输入框" - 可能是页签/按钮,不是输入操作
303
- 3. "登陆" → "登录" - 同义词替换
304
- 4. "点击XX按钮" → "XX" - 去除无意义词
305
- 5. resource-id直接匹配
306
-
307
- 策略:
308
- - 完全匹配优先(准确性第一)
309
- - 去除无意义词再匹配
310
- - 同义词自动替换
311
- """
312
- import time
313
- start_time = time.time()
314
-
315
- query_lower = query.lower().strip()
316
-
317
- # ⚡ 优化1: 同义词替换
318
- if "登陆" in query_lower:
319
- query_lower = query_lower.replace("登陆", "登录")
320
- print(f" ⚡ 同义词替换: '登陆' → '登录'", file=sys.stderr)
321
-
322
- # ⚡ 优化2: resource-id快速匹配(如果query包含:id/或com.开头)
323
- if ":id/" in query or query.startswith("com."):
324
- print(f" ⚡ 检测到resource-id格式,直接匹配", file=sys.stderr)
325
- for elem in elements:
326
- if elem.get('resource_id') == query:
327
- print(f" ✅ resource-id完全匹配: {query}", file=sys.stderr)
328
- return {
329
- 'element': query,
330
- 'ref': query,
331
- 'confidence': 100,
332
- 'method': 'quick_match_resource_id'
333
- }
334
-
335
- # ⚡ 优化3: 去除无意义词,提取关键词
336
- query_clean = query_lower
337
- removed_words = []
338
- if "点击" in query_clean:
339
- query_clean = query_clean.replace("点击", "").strip()
340
- removed_words.append("点击")
341
- if "按钮" in query_clean and "输入框" not in query_clean:
342
- query_clean = query_clean.replace("按钮", "").strip()
343
- removed_words.append("按钮")
344
-
345
- if removed_words:
346
- print(f" ⚡ 去除无意义词: {', '.join(removed_words)} → '{query_clean}'", file=sys.stderr)
347
-
348
- # 判断是否可能被误判为输入操作
349
- has_input_keyword = "输入" in query_lower
350
- is_not_input_box = "输入框" not in query_lower
351
-
352
- # 如果包含"输入"但不是"输入框",可能是页签/按钮(如"输入邮箱"页签)
353
- # 或者去除了无意义词后,都应该在clickable元素中优先查找
354
- if (has_input_keyword and is_not_input_box) or removed_words:
355
- if has_input_keyword and is_not_input_box:
356
- print(f" ⚡ 快速预匹配: 检测到'输入'但不是'输入框',先查找clickable元素", file=sys.stderr)
357
-
358
- # 在clickable元素中查找
359
- filter_start = time.time()
360
- clickable_elements = [e for e in elements if e.get('clickable', False)]
361
- filter_time = (time.time() - filter_start) * 1000
362
- print(f" ⏱️ 预过滤: {filter_time:.2f}ms (从{len(elements)}个筛选到{len(clickable_elements)}个clickable)", file=sys.stderr)
363
-
364
- # ⚡ 优化4: 完全匹配优先(最重要!)
365
- match_start = time.time()
366
- for elem in clickable_elements:
367
- text = elem.get('text', '').lower()
368
- content_desc = elem.get('content_desc', '').lower()
369
- # 清理content_desc(去除换行符和额外文本)
370
- content_desc_clean = content_desc.split('\n')[0].strip() if content_desc else ''
371
- content_desc_clean_lower = content_desc_clean.lower()
372
-
373
- # 完全匹配优先(使用清理后的query)
374
- if query_clean == content_desc_clean_lower or query_clean == text:
375
- # 找到完全匹配!
376
- match_time = (time.time() - match_start) * 1000
377
- ref = elem.get('resource_id') or content_desc_clean or text
378
- element_desc = content_desc_clean or text or query
379
-
380
- total_time = (time.time() - start_time) * 1000
381
- print(f" ✅ 完全匹配(清理后): {element_desc}", file=sys.stderr)
382
- print(f" ⏱️ 匹配耗时: {match_time:.2f}ms | 快速预匹配总耗时: {total_time:.2f}ms", file=sys.stderr)
383
-
384
- return {
385
- 'element': element_desc,
386
- 'ref': ref,
387
- 'confidence': 95,
388
- 'method': 'quick_match'
389
- }
390
-
391
- # 原始query也试试完全匹配
392
- if query_lower == content_desc_clean_lower or query_lower == text:
393
- match_time = (time.time() - match_start) * 1000
394
- ref = elem.get('resource_id') or content_desc_clean or text
395
- element_desc = content_desc_clean or text or query
396
-
397
- total_time = (time.time() - start_time) * 1000
398
- print(f" ✅ 完全匹配(原始): {element_desc}", file=sys.stderr)
399
- print(f" ⏱️ 匹配耗时: {match_time:.2f}ms | 快速预匹配总耗时: {total_time:.2f}ms", file=sys.stderr)
400
-
401
- return {
402
- 'element': element_desc,
403
- 'ref': ref,
404
- 'confidence': 95,
405
- 'method': 'quick_match'
406
- }
407
-
408
- # 完全匹配失败,再尝试包含匹配(降级)
409
- match_time = (time.time() - match_start) * 1000
410
- print(f" ⏱️ 完全匹配遍历: {match_time:.2f}ms (未找到)", file=sys.stderr)
411
-
412
- contain_start = time.time()
413
- for elem in clickable_elements:
414
- text = elem.get('text', '').lower()
415
- content_desc = elem.get('content_desc', '').lower()
416
- content_desc_clean = content_desc.split('\n')[0].strip() if content_desc else ''
417
- content_desc_clean_lower = content_desc_clean.lower()
418
-
419
- # 包含匹配(使用清理后的query)
420
- if query_clean in content_desc_clean_lower or query_clean in text:
421
- contain_time = (time.time() - contain_start) * 1000
422
- ref = elem.get('resource_id') or content_desc_clean or text
423
- element_desc = content_desc_clean or text or query
424
-
425
- total_time = (time.time() - start_time) * 1000
426
- print(f" ✅ 包含匹配: {element_desc}", file=sys.stderr)
427
- print(f" ⏱️ 包含匹配耗时: {contain_time:.2f}ms | 快速预匹配总耗时: {total_time:.2f}ms", file=sys.stderr)
428
-
429
- return {
430
- 'element': element_desc,
431
- 'ref': ref,
432
- 'confidence': 85,
433
- 'method': 'quick_match'
434
- }
435
-
436
- total_time = (time.time() - start_time) * 1000
437
- if total_time > 5: # 只有超过5ms才打印
438
- print(f" ⏱️ 快速预匹配: {total_time:.2f}ms (未匹配)", file=sys.stderr)
439
- return None
440
-
441
- async def _try_rule_match(self, elements: list, query: str) -> Optional[Dict]:
442
- """
443
- 移动端规则匹配(独立实现,不依赖 browser_mcp)
444
-
445
- 规则优先级:
446
- 1. 精确文本匹配
447
- 2. 同义词映射匹配
448
- 3. 类型+关键词匹配(如 EditText + 用户名)
449
- 4. 常见 UI 模式匹配
450
-
451
- Args:
452
- elements: 已解析的元素列表
453
- query: 查询文本
454
- """
455
- print(f" 📐 Level 2: 移动端规则匹配...", file=sys.stderr)
456
-
457
- query_lower = query.lower().strip()
458
-
459
- # ==================== 同义词映射 ====================
460
- SYNONYMS = {
461
- '登陆': '登录',
462
- '登入': '登录',
463
- 'signin': '登录',
464
- 'login': '登录',
465
- '注冊': '注册',
466
- 'signup': '注册',
467
- 'register': '注册',
468
- '确认': '确定',
469
- 'ok': '确定',
470
- 'confirm': '确定',
471
- '取消': '取消',
472
- 'cancel': '取消',
473
- '关闭': '关闭',
474
- 'close': '关闭',
475
- '搜索': '搜索',
476
- 'search': '搜索',
477
- '发送': '发送',
478
- 'send': '发送',
479
- '提交': '提交',
480
- 'submit': '提交',
481
- '返回': '返回',
482
- 'back': '返回',
483
- '下一步': '下一步',
484
- 'next': '下一步',
485
- '上一步': '上一步',
486
- 'prev': '上一步',
487
- '完成': '完成',
488
- 'done': '完成',
489
- '保存': '保存',
490
- 'save': '保存',
491
- '删除': '删除',
492
- 'delete': '删除',
493
- '编辑': '编辑',
494
- 'edit': '编辑',
495
- '添加': '添加',
496
- 'add': '添加',
497
- '刷新': '刷新',
498
- 'refresh': '刷新',
499
- }
500
-
501
- # 应用同义词映射
502
- query_normalized = query_lower
503
- for old, new in SYNONYMS.items():
504
- if old in query_normalized:
505
- query_normalized = query_normalized.replace(old, new)
506
- print(f" ⚡ 同义词替换: '{old}' → '{new}'", file=sys.stderr)
507
-
508
- # ==================== 常见按钮规则 ====================
509
- BUTTON_RULES = {
510
- '登录': ['登录', '登 录', 'Login', 'Sign in', '立即登录'],
511
- '注册': ['注册', '注 册', 'Register', 'Sign up', '立即注册'],
512
- '确定': ['确定', '确 定', 'OK', 'Confirm', '好的', '知道了'],
513
- '取消': ['取消', 'Cancel', '算了'],
514
- '关闭': ['关闭', 'Close', '×', 'X'],
515
- '搜索': ['搜索', 'Search', '搜 索'],
516
- '发送': ['发送', 'Send', '发 送'],
517
- '提交': ['提交', 'Submit', '提 交'],
518
- '下一步': ['下一步', 'Next', '继续', '下一步'],
519
- '完成': ['完成', 'Done', 'Finish', '完 成'],
520
- '保存': ['保存', 'Save', '保 存'],
521
- '删除': ['删除', 'Delete', '移除', 'Remove'],
522
- '添加': ['添加', 'Add', '新增', '+ '],
523
- '刷新': ['刷新', 'Refresh', '重新加载'],
524
- '分享': ['分享', 'Share', '分 享'],
525
- '收藏': ['收藏', 'Favorite', '收 藏'],
526
- '点赞': ['点赞', 'Like', '赞', '👍'],
527
- '评论': ['评论', 'Comment', '评 论'],
528
- '设置': ['设置', 'Settings', '设 置'],
529
- '我的': ['我的', 'Mine', 'My', '个人中心'],
530
- '首页': ['首页', 'Home', '主页'],
531
- '消息': ['消息', 'Message', '通知'],
532
- }
533
-
534
- # ==================== 输入框规则 ====================
535
- INPUT_RULES = {
536
- '用户名': ['用户名', '账号', '账户', 'Username', 'Account'],
537
- '密码': ['密码', 'Password', '口令'],
538
- '手机': ['手机', '手机号', '电话', 'Phone', 'Mobile'],
539
- '邮箱': ['邮箱', '邮件', 'Email', 'E-mail'],
540
- '验证码': ['验证码', '验证', 'Code', 'Captcha'],
541
- '搜索': ['搜索', 'Search', '搜一搜'],
542
- '输入': ['输入', '请输入', 'Enter', 'Input'],
543
- }
544
-
545
- # ==================== 规则匹配逻辑 ====================
546
-
547
- # 1. 检查是否是按钮/点击操作
548
- is_click_action = any(kw in query_lower for kw in ['点击', '按', '点', 'click', 'tap', '按钮'])
549
-
550
- # 2. 检查是否是输入操作
551
- is_input_action = any(kw in query_lower for kw in ['输入', '填写', '输入框', 'input', 'type', 'enter'])
552
-
553
- # 3. 尝试按钮规则匹配
554
- for rule_key, rule_texts in BUTTON_RULES.items():
555
- if rule_key in query_normalized:
556
- # 在可点击元素中查找匹配
557
- for elem in elements:
558
- if not (elem.get('clickable') or elem.get('focusable')):
559
- continue
560
-
561
- elem_text = elem.get('text', '').strip()
562
- elem_desc = elem.get('content_desc', '').strip()
563
- elem_id = elem.get('resource_id', '').lower()
564
-
565
- # 检查是否匹配规则文本
566
- for rule_text in rule_texts:
567
- if (rule_text.lower() in elem_text.lower() or
568
- rule_text.lower() in elem_desc.lower() or
569
- rule_key in elem_id):
570
- print(f" ✅ 按钮规则匹配: '{rule_key}' → '{elem_text or elem_desc}'", file=sys.stderr)
571
- return {
572
- 'element': elem_text or elem_desc or rule_key,
573
- 'ref': elem.get('bounds', ''),
574
- 'confidence': 90,
575
- 'method': 'rule_match_button'
576
- }
577
-
578
- # 4. 尝试输入框规则匹配
579
- if is_input_action or '输入框' in query_lower:
580
- for rule_key, rule_texts in INPUT_RULES.items():
581
- if rule_key in query_normalized:
582
- # 在输入框元素中查找
583
- for elem in elements:
584
- class_name = elem.get('class_name', '')
585
- if 'EditText' not in class_name and 'TextField' not in class_name:
586
- continue
587
-
588
- elem_text = elem.get('text', '').strip()
589
- elem_desc = elem.get('content_desc', '').strip()
590
- elem_hint = elem.get('hint', '').strip() # Android 输入框提示
591
- elem_id = elem.get('resource_id', '').lower()
592
-
593
- # 检查是否匹配规则文本
594
- for rule_text in rule_texts:
595
- if (rule_text.lower() in elem_text.lower() or
596
- rule_text.lower() in elem_desc.lower() or
597
- rule_text.lower() in elem_hint.lower() or
598
- rule_key in elem_id):
599
- print(f" ✅ 输入框规则匹配: '{rule_key}' → '{elem_text or elem_desc or elem_hint}'", file=sys.stderr)
600
- return {
601
- 'element': elem_text or elem_desc or elem_hint or rule_key,
602
- 'ref': elem.get('resource_id') or elem.get('bounds', ''),
603
- 'confidence': 90,
604
- 'method': 'rule_match_input'
605
- }
606
-
607
- # 5. 精确文本匹配(去除动作词后匹配)
608
- action_words = ['点击', '按', '点', '输入', '填写', '选择', 'click', 'tap', 'enter', 'input', 'select']
609
- clean_query = query_normalized
610
- for word in action_words:
611
- clean_query = clean_query.replace(word, '').strip()
612
-
613
- if clean_query:
614
- for elem in elements:
615
- if not (elem.get('clickable') or elem.get('focusable') or elem.get('enabled')):
616
- continue
617
-
618
- elem_text = elem.get('text', '').strip().lower()
619
- elem_desc = elem.get('content_desc', '').strip().lower()
620
-
621
- # 精确匹配或包含匹配
622
- if clean_query == elem_text or clean_query == elem_desc:
623
- print(f" ✅ 精确文本匹配: '{clean_query}'", file=sys.stderr)
624
- return {
625
- 'element': elem.get('text') or elem.get('content_desc'),
626
- 'ref': elem.get('bounds', ''),
627
- 'confidence': 95,
628
- 'method': 'rule_match_exact'
629
- }
630
- elif clean_query in elem_text or clean_query in elem_desc:
631
- print(f" ✅ 包含文本匹配: '{clean_query}' in '{elem_text or elem_desc}'", file=sys.stderr)
632
- return {
633
- 'element': elem.get('text') or elem.get('content_desc'),
634
- 'ref': elem.get('bounds', ''),
635
- 'confidence': 85,
636
- 'method': 'rule_match_contains'
637
- }
638
-
639
- print(f" ⚠️ 规则匹配未命中", file=sys.stderr)
640
- return None
641
-
642
- async def _try_xml_analysis(self, elements: list, query: str):
643
- """
644
- XML深度分析
645
-
646
- Args:
647
- elements: 已解析的元素列表(复用,避免重复读取XML)
648
- query: 查询文本
649
-
650
- Returns:
651
- (result, candidates): result为定位结果,candidates为候选元素列表(用于AI兜底)
652
- """
653
- import time
654
- start_time = time.time()
655
-
656
- print(f" 📋 Level 3: XML深度分析...", file=sys.stderr)
657
-
658
- # 🎯 优化:只在调试模式下打印XML结构预览
659
- # 通过环境变量 MOBILE_MCP_DEBUG=1 开启
660
- import os
661
- debug_mode = os.getenv('MOBILE_MCP_DEBUG', '0') == '1'
662
-
663
- if debug_mode:
664
- # 打印XML结构(调试用)
665
- print(f" 📄 XML结构预览(共{len(elements)}个元素):", file=sys.stderr)
666
- print(f" {'─' * 60}", file=sys.stderr)
667
-
668
- # 只打印前20个有意义的元素(避免输出过多)
669
- meaningful_elements = [
670
- e for e in elements
671
- if e.get('text') or e.get('content_desc') or e.get('resource_id') or e.get('clickable')
672
- ][:20]
673
-
674
- for i, elem in enumerate(meaningful_elements, 1):
675
- text = elem.get('text', '')
676
- desc = elem.get('content_desc', '')
677
- resource_id = elem.get('resource_id', '')
678
- class_name = elem.get('class_name', '')
679
- clickable = elem.get('clickable', False)
680
- focusable = elem.get('focusable', False)
681
-
682
- # 格式化输出
683
- parts = []
684
- if text:
685
- parts.append(f"text='{text[:30]}'")
686
- if desc:
687
- desc_clean = desc.split('\n')[0][:30]
688
- parts.append(f"desc='{desc_clean}'")
689
- if resource_id:
690
- parts.append(f"id='{resource_id[:30]}'")
691
- if class_name:
692
- parts.append(f"class={class_name}")
693
- if clickable:
694
- parts.append("[clickable]")
695
- if focusable:
696
- parts.append("[focusable]")
697
-
698
- print(f" {i:2d}. {' | '.join(parts) if parts else 'empty element'}", file=sys.stderr)
699
-
700
- if len(meaningful_elements) < len([e for e in elements if e.get('text') or e.get('content_desc')]):
701
- print(f" ... (还有更多元素,共{len(elements)}个)", file=sys.stderr)
702
- print(f" {'─' * 60}", file=sys.stderr)
703
-
704
- # 文本匹配
705
- query_lower = query.lower().strip()
706
-
707
- # ⚡ 同义词处理:登陆 -> 登录
708
- if "登陆" in query_lower:
709
- query_lower = query_lower.replace("登陆", "登录")
710
- print(f" ⚡ 同义词替换: '登陆' → '登录'", file=sys.stderr)
711
-
712
- matched = []
713
-
714
- # 提取关键词(去除"输入框"、"按钮"等后缀)
715
- query_keywords = query_lower
716
- if "输入框" in query:
717
- query_keywords = query_lower.replace("输入框", "").strip()
718
- elif "按钮" in query:
719
- query_keywords = query_lower.replace("按钮", "").strip()
720
- elif "页签" in query or "标签" in query:
721
- query_keywords = query_lower.replace("页签", "").replace("标签", "").strip()
722
- elif "图标" in query:
723
- query_keywords = query_lower.replace("图标", "").strip()
724
-
725
- # 判断查询类型:输入框 vs 页签/按钮 vs 图标
726
- is_input_query = "输入框" in query or "输入" in query
727
- is_tab_query = "页签" in query or "标签" in query or ("点击" in query and "输入" not in query)
728
- is_icon_query = "图标" in query or ("搜索" in query and "图标" in query) or ("右上角" in query and "图标" in query)
729
-
730
- # 🚀 性能优化策略(准确性优先 + 速度优化)
731
-
732
- # 步骤1: 根据查询类型预过滤元素(大幅减少遍历范围,提速50%+)
733
- filter_start = time.time()
734
- candidate_elements = []
735
-
736
- if is_input_query and "输入框" in query:
737
- # 查询输入框:只看EditText类型(准确性优先)
738
- candidate_elements = [e for e in elements if e.get('class_name', '').lower() in ['edittext', 'textfield']]
739
- filter_time = (time.time() - filter_start) * 1000
740
- if len(candidate_elements) < len(elements):
741
- print(f" 🎯 输入框查询优化: 从{len(elements)}个元素缩减到{len(candidate_elements)}个EditText (⏱️ {filter_time:.2f}ms)", file=sys.stderr)
742
-
743
- # 特殊处理:如果查询输入框,直接匹配所有EditText(包括空的)
744
- # 这样可以匹配到空输入框,后续通过评分选择最佳
745
- match_start = time.time() # 定义match_start
746
- matched = candidate_elements
747
- match_time = (time.time() - match_start) * 1000
748
- print(f" ✅ 找到 {len(matched)} 个EditText元素(包括空输入框) (⏱️ {match_time:.2f}ms)", file=sys.stderr)
749
-
750
- elif is_icon_query:
751
- # 🎯 图标查询优化:优先从顶部区域筛选
752
- # 1. 先筛选可点击的图标元素(Image/ImageView类型,或者无文本的可点击元素)
753
- icon_elements = []
754
- for e in elements:
755
- if not e.get('clickable', False):
756
- continue
757
-
758
- class_name = e.get('class_name', '').lower()
759
- text = e.get('text', '')
760
- content_desc = e.get('content_desc', '')
761
-
762
- # 图标特征:Image类型,或者无文本的可点击元素(可能是图标)
763
- is_image_type = ('image' in class_name or class_name in ['imageview', 'imagebutton'])
764
- is_icon_like = not text and not content_desc # 无文本描述,可能是图标
765
-
766
- if is_image_type or is_icon_like:
767
- icon_elements.append(e)
768
-
769
- # 2. 如果查询包含"右上角"、"顶部"等位置描述,优先筛选顶部区域元素
770
- if "右上角" in query or "顶部" in query or "上角" in query:
771
- # 解析bounds,筛选Y坐标较小的元素(顶部区域)
772
- screen_height = 2400 # 默认屏幕高度,可以从设备获取
773
- top_threshold = screen_height * 0.3 # 顶部30%区域
774
-
775
- top_icon_elements = []
776
- for elem in icon_elements:
777
- bounds = elem.get('bounds', '')
778
- import re
779
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
780
- if match:
781
- y1 = int(match.group(2))
782
- if y1 < top_threshold:
783
- top_icon_elements.append(elem)
784
-
785
- if top_icon_elements:
786
- candidate_elements = top_icon_elements
787
- else:
788
- candidate_elements = icon_elements
789
- else:
790
- candidate_elements = icon_elements
791
-
792
- filter_time = (time.time() - filter_start) * 1000
793
- if "右上角" in query or "顶部" in query or "上角" in query:
794
- print(f" 🎯 图标查询优化(顶部区域): 从{len(elements)}个元素缩减到{len(candidate_elements)}个顶部图标元素 (⏱️ {filter_time:.2f}ms)", file=sys.stderr)
795
- else:
796
- print(f" 🎯 图标查询优化: 从{len(elements)}个元素缩减到{len(candidate_elements)}个图标元素 (⏱️ {filter_time:.2f}ms)", file=sys.stderr)
797
-
798
- # 步骤2: 遍历候选元素进行文本匹配
799
- match_start = time.time()
800
- matched = []
801
- for element in candidate_elements:
802
- text = element.get('text', '').lower()
803
- content_desc = element.get('content_desc', '').lower()
804
- content_desc_clean = content_desc.split('\n')[0].strip() if content_desc else ''
805
- content_desc_clean_lower = content_desc_clean.lower()
806
- bounds = element.get('bounds', '')
807
-
808
- # 图标匹配:优先匹配description,也匹配text
809
- text_matched = (query_lower == content_desc_clean_lower or # 完全匹配desc
810
- query_lower == text or # 完全匹配text
811
- query_lower in content_desc_clean_lower or # 包含匹配desc
812
- query_lower in text or # 包含匹配text
813
- query_keywords in content_desc_clean_lower or # 关键词匹配desc
814
- query_keywords in text) # 关键词匹配text
815
-
816
- # 🎯 特殊处理:如果图标没有文本描述,根据位置匹配
817
- if not text_matched and not text and not content_desc:
818
- # 无文本图标,根据位置描述匹配
819
- import re
820
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
821
- if match:
822
- x1, y1, x2, y2 = map(int, match.groups())
823
- center_x = (x1 + x2) // 2
824
- center_y = (y1 + y2) // 2
825
- screen_width = 1080
826
- screen_height = 2400
827
-
828
- # 右上角判断:X坐标在右侧70%以上,Y坐标在顶部30%以内
829
- is_top_right = center_x > screen_width * 0.7 and center_y < screen_height * 0.3
830
- # 顶部判断:Y坐标在顶部30%以内
831
- is_top = center_y < screen_height * 0.3
832
- # 右侧判断:X坐标在右侧70%以上
833
- is_right = center_x > screen_width * 0.7
834
-
835
- # 根据查询中的位置关键词匹配
836
- if ("右上角" in query or "上角" in query) and is_top_right:
837
- matched.append(element)
838
- print(f" ✅ 位置匹配(右上角): bounds={bounds}, center=({center_x}, {center_y})", file=sys.stderr)
839
- elif "顶部" in query and is_top:
840
- matched.append(element)
841
- print(f" ✅ 位置匹配(顶部): bounds={bounds}, center=({center_x}, {center_y})", file=sys.stderr)
842
- elif "右侧" in query or "右边" in query and is_right:
843
- matched.append(element)
844
- print(f" ✅ 位置匹配(右侧): bounds={bounds}, center=({center_x}, {center_y})", file=sys.stderr)
845
-
846
- if text_matched:
847
- matched.append(element)
848
-
849
- elif is_tab_query or ("点击" in query and "输入框" not in query):
850
- # 查询页签/按钮:只看可点击元素
851
- clickable_elements = [e for e in elements if e.get('clickable', False)]
852
- filter_time = (time.time() - filter_start) * 1000
853
- if len(clickable_elements) < len(elements):
854
- candidate_elements = clickable_elements
855
- print(f" 🎯 点击查询优化: 从{len(elements)}个元素缩减到{len(candidate_elements)}个可点击元素 (⏱️ {filter_time:.2f}ms)", file=sys.stderr)
856
- else:
857
- candidate_elements = elements
858
- print(f" ⏱️ 预过滤: {filter_time:.2f}ms (无缩减)", file=sys.stderr)
859
-
860
- # 步骤2: 遍历候选元素进行文本匹配
861
- match_start = time.time()
862
- matched = []
863
- for element in candidate_elements:
864
- text = element.get('text', '').lower()
865
- content_desc = element.get('content_desc', '').lower()
866
- content_desc_clean = content_desc.split('\n')[0].strip() if content_desc else ''
867
- content_desc_clean_lower = content_desc_clean.lower()
868
-
869
- # 匹配条件(简化判断提高速度)
870
- if (query_lower == content_desc_clean_lower or # 完全匹配desc
871
- query_lower == text or # 完全匹配text
872
- query_lower in content_desc_clean_lower or # 包含匹配desc
873
- query_lower in text or # 包含匹配text
874
- query_keywords in content_desc_clean_lower or # 关键词匹配desc
875
- query_keywords in text): # 关键词匹配text
876
- matched.append(element)
877
- else:
878
- # 其他查询:使用全部元素进行文本匹配
879
- candidate_elements = elements
880
- match_start = time.time() # 定义match_start
881
- matched = []
882
- for element in candidate_elements:
883
- text = element.get('text', '').lower()
884
- content_desc = element.get('content_desc', '').lower()
885
- class_name = element.get('class_name', '').lower()
886
-
887
- # 跳过无意义的容器元素
888
- if class_name in ['framelayout', 'linearlayout', 'relativelayout'] and not text and not content_desc:
889
- continue
890
-
891
- content_desc_clean = content_desc.split('\n')[0].strip() if content_desc else ''
892
- content_desc_clean_lower = content_desc_clean.lower()
893
-
894
- # 匹配条件
895
- if (query_lower == content_desc_clean_lower or
896
- query_lower == text or
897
- query_lower in content_desc_clean_lower or
898
- query_lower in text or
899
- query_keywords in content_desc_clean_lower or
900
- query_keywords in text):
901
- matched.append(element)
902
-
903
- if matched:
904
- match_time = (time.time() - match_start) * 1000
905
- print(f" ✅ 找到 {len(matched)} 个匹配元素 (⏱️ 文本匹配: {match_time:.2f}ms)", file=sys.stderr)
906
- print(f" {'─' * 60}", file=sys.stderr)
907
-
908
- # 显示所有匹配元素(不限制数量,让用户看到完整情况)
909
- for i, elem in enumerate(matched, 1):
910
- text = elem.get('text', '')
911
- desc = elem.get('content_desc', '')
912
- resource_id = elem.get('resource_id', '')
913
- class_name = elem.get('class_name', '')
914
- clickable = elem.get('clickable', False)
915
- focusable = elem.get('focusable', False)
916
- bounds = elem.get('bounds', '')
917
-
918
- # 计算匹配分数(用于显示)
919
- score = 0
920
- content_desc_clean_lower = desc.split('\n')[0].strip().lower() if desc else ''
921
- text_lower = text.lower()
922
-
923
- if query_lower == content_desc_clean_lower:
924
- score += 100
925
- elif query_lower in content_desc_clean_lower:
926
- score += 50
927
- elif query_keywords in content_desc_clean_lower:
928
- score += 48
929
-
930
- if query_lower == text_lower:
931
- score += 80
932
- elif query_lower in text_lower:
933
- score += 40
934
-
935
- if clickable:
936
- score += 20
937
- if focusable:
938
- score += 5
939
- if resource_id:
940
- score += 5
941
-
942
- # 格式化显示
943
- parts = []
944
- if text:
945
- parts.append(f"text='{text}'")
946
- if desc:
947
- desc_clean = desc.split('\n')[0]
948
- parts.append(f"desc='{desc_clean}'")
949
- if resource_id:
950
- parts.append(f"id='{resource_id}'")
951
- if class_name:
952
- parts.append(f"class={class_name}")
953
- if clickable:
954
- parts.append("[clickable]")
955
- if focusable:
956
- parts.append("[focusable]")
957
- if bounds:
958
- parts.append(f"bounds={bounds}")
959
-
960
- # 计算最终分数(在评分循环中会重新计算,这里只是显示)
961
- print(f" [{i:3d}] 分数={score:3d} | {' | '.join(parts) if parts else 'empty element'}", file=sys.stderr)
962
-
963
- print(f" {'─' * 60}", file=sys.stderr)
964
-
965
- # 🎯 Phase 1优化:位置索引定位(仅针对输入框查询)
966
- # 如果是输入框查询,且所有匹配的元素都是EditText且没有任何标识
967
- # 则使用位置索引(关键词)来区分
968
- if is_input_query and "输入框" in query and matched:
969
- # 检查是否所有匹配元素都是EditText且没有text/content_desc/resource_id
970
- all_empty_edittext = all(
971
- e.get('class_name', '').lower() in ['edittext', 'textfield'] and
972
- not e.get('text') and
973
- not e.get('content_desc') and
974
- not e.get('resource_id')
975
- for e in matched
976
- )
977
-
978
- if all_empty_edittext and len(matched) > 1:
979
- # 所有输入框都没有标识,使用位置索引
980
- print(f" 🎯 检测到{len(matched)}个无标识EditText,使用位置索引定位", file=sys.stderr)
981
-
982
- # 按Y坐标排序
983
- import re
984
- def get_y_coord(elem):
985
- bounds = elem.get('bounds', '')
986
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
987
- return int(match.group(2)) if match else 9999
988
-
989
- sorted_edittexts = sorted(matched, key=get_y_coord)
990
-
991
- # 根据关键词选择
992
- target_elem = None
993
- # 🎯 支持"第一个"、"第二个"、"第三个"等描述
994
- if any(kw in query for kw in ['第一个', '第1个', '1个', '首个']):
995
- target_elem = sorted_edittexts[0] if len(sorted_edittexts) > 0 else None
996
- if target_elem:
997
- print(f" → 关键词'第一个' → 第1个EditText (Y={get_y_coord(target_elem)})", file=sys.stderr)
998
- elif any(kw in query for kw in ['第二个', '第2个', '2个']):
999
- target_elem = sorted_edittexts[1] if len(sorted_edittexts) > 1 else None
1000
- if target_elem:
1001
- print(f" → 关键词'第二个' → 第2个EditText (Y={get_y_coord(target_elem)})", file=sys.stderr)
1002
- elif any(kw in query for kw in ['第三个', '第3个', '3个']):
1003
- target_elem = sorted_edittexts[2] if len(sorted_edittexts) > 2 else None
1004
- if target_elem:
1005
- print(f" → 关键词'第三个' → 第3个EditText (Y={get_y_coord(target_elem)})", file=sys.stderr)
1006
- # 原有的关键词匹配
1007
- elif any(kw in query for kw in ['邮箱', '账号', '用户名', '手机号', '电话']):
1008
- target_elem = sorted_edittexts[0]
1009
- print(f" → 关键词'邮箱/账号' → 第1个EditText (Y={get_y_coord(target_elem)})", file=sys.stderr)
1010
- elif '验证码' in query:
1011
- target_elem = sorted_edittexts[1] if len(sorted_edittexts) > 1 else sorted_edittexts[0]
1012
- print(f" → 关键词'验证码' → 第2个EditText (Y={get_y_coord(target_elem)})", file=sys.stderr)
1013
- elif '密码' in query:
1014
- target_elem = sorted_edittexts[1] if len(sorted_edittexts) > 1 else sorted_edittexts[0]
1015
- print(f" → 关键词'密码' → 第2个EditText (Y={get_y_coord(target_elem)})", file=sys.stderr)
1016
-
1017
- if target_elem:
1018
- # 直接返回,使用bounds或class_name[index]作为ref
1019
- ref = target_elem.get('bounds', '')
1020
- if not ref:
1021
- index = sorted_edittexts.index(target_elem)
1022
- ref = f"EditText[{index}]"
1023
-
1024
- print(f" 🎯 位置索引定位成功:", file=sys.stderr)
1025
- print(f" 元素: {query}", file=sys.stderr)
1026
- print(f" ref: '{ref}'", file=sys.stderr)
1027
- print(f" 置信度: 90%", file=sys.stderr)
1028
-
1029
- result = {
1030
- 'element': query,
1031
- 'ref': ref,
1032
- 'confidence': 90,
1033
- 'method': 'position_index'
1034
- }
1035
- return (result, []) # 成功找到,不需要AI兜底
1036
-
1037
- # 🔍 检测超大容器元素(H5页面的容器)
1038
- # 如果是超大容器,使用bounds坐标点击(点击容器底部中心,提交按钮通常在那里)
1039
- filtered_matched = []
1040
- large_container = None
1041
-
1042
- for elem in matched:
1043
- bounds = elem.get('bounds', '')
1044
- if bounds:
1045
- import re
1046
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
1047
- if match:
1048
- x1, y1, x2, y2 = int(match.group(1)), int(match.group(2)), int(match.group(3)), int(match.group(4))
1049
- width = x2 - x1
1050
- height = y2 - y1
1051
- # 如果宽度超过屏幕宽度的90%,很可能是H5容器元素
1052
- if width > 1080 * 0.9: # 假设屏幕宽度1080
1053
- print(f" ⚠️ 检测到超大H5容器: width={width}, height={height}", file=sys.stderr)
1054
- print(f" bounds={bounds}", file=sys.stderr)
1055
- # 保存这个容器,如果没有其他元素,就点击容器底部中心
1056
- large_container = elem
1057
- continue
1058
- filtered_matched.append(elem)
1059
-
1060
- # 如果过滤后没有元素了,使用超大容器的bounds坐标点击
1061
- if not filtered_matched and large_container:
1062
- print(f" 🎯 使用H5容器bounds坐标定位(点击底部中心)", file=sys.stderr)
1063
- bounds = large_container.get('bounds', '')
1064
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
1065
- if match:
1066
- x1, y1, x2, y2 = int(match.group(1)), int(match.group(2)), int(match.group(3)), int(match.group(4))
1067
- # 计算底部中心点(Y坐标在容器的95%位置,提交按钮通常在最底部)
1068
- center_x = (x1 + x2) // 2
1069
- bottom_y = int(y1 + (y2 - y1) * 0.95) # 95%位置(接近底部)
1070
-
1071
- # 构造一个新的bounds,指向底部中心区域
1072
- # 创建一个小的点击区域(50x50像素)
1073
- click_bounds = f"[{center_x-25},{bottom_y-25}][{center_x+25},{bottom_y+25}]"
1074
-
1075
- print(f" 点击位置: ({center_x}, {bottom_y})", file=sys.stderr)
1076
- print(f" 点击bounds: {click_bounds}", file=sys.stderr)
1077
-
1078
- # 直接返回结果,使用bounds作为ref
1079
- result = {
1080
- 'element': query,
1081
- 'ref': click_bounds, # 使用计算后的点击区域
1082
- 'confidence': 85,
1083
- 'method': 'h5_container_bounds'
1084
- }
1085
- return (result, [])
1086
-
1087
- # 如果过滤后没有元素也没有容器,使用原始列表
1088
- if not filtered_matched:
1089
- print(f" ⚠️ 过滤后无元素,使用原始列表", file=sys.stderr)
1090
- filtered_matched = matched
1091
- elif len(filtered_matched) < len(matched):
1092
- print(f" ✅ 过滤后剩余 {len(filtered_matched)} 个元素(原{len(matched)}个)", file=sys.stderr)
1093
-
1094
- # 为每个匹配元素计算详细分数
1095
- score_start = time.time()
1096
- scored_elements = []
1097
-
1098
- for element in filtered_matched:
1099
- score = 0
1100
- score_details = [] # 记录加分详情(用于调试)
1101
- content_desc = element.get('content_desc', '')
1102
- content_desc_lower = content_desc.lower()
1103
- # 清理content_desc(去除换行符和额外文本)
1104
- content_desc_clean = content_desc.split('\n')[0].strip() if content_desc else ''
1105
- content_desc_clean_lower = content_desc_clean.lower()
1106
- text = element.get('text', '').lower()
1107
- class_name = element.get('class_name', '').lower()
1108
-
1109
- # 元素类型判断
1110
- is_textbox = class_name in ['edittext', 'textfield']
1111
- is_button = element.get('clickable', False) and not is_textbox
1112
- is_tab = element.get('clickable', False) and ('标签' in content_desc or '标签' in text)
1113
-
1114
- # ===== 类型匹配加分(最重要) =====
1115
- # 如果查询包含"输入框",EditText类型应该获得大幅加分
1116
- # 注意:只有当查询明确包含"输入框"时才加分,避免"输入邮箱"页签被误判
1117
- if is_input_query and "输入框" in query and is_textbox:
1118
- score += 200 # 输入框查询匹配到EditText,大幅加分
1119
- score_details.append("类型匹配EditText+200")
1120
-
1121
- # 额外加分:优先匹配空的输入框(没有text或text是占位符的)
1122
- # 检查text是否为空或只是占位符(如"请输入"、"•••"等)
1123
- is_empty_or_placeholder = (
1124
- not text or
1125
- text.strip() == '' or
1126
- text.strip() == '•••••••••••••••' or # 密码占位符
1127
- '请输入' in text or
1128
- '请填写' in text
1129
- )
1130
-
1131
- if is_empty_or_placeholder:
1132
- score += 100 # 空输入框大幅优先
1133
- score_details.append("空输入框+100")
1134
- else:
1135
- score -= 50 # 已有文本的输入框大幅降分(避免匹配到已填写的输入框)
1136
- score_details.append(f"已有文本({text})-50")
1137
-
1138
- # 如果查询包含"按钮",可点击的按钮应该获得加分
1139
- if "按钮" in query and is_button:
1140
- score += 150 # 按钮查询匹配到按钮元素
1141
- score_details.append("类型匹配Button+150")
1142
-
1143
- # 如果查询包含"页签"或"标签",页签元素应该获得加分
1144
- if is_tab_query and is_tab:
1145
- score += 150 # 页签查询匹配到页签元素
1146
- score_details.append("类型匹配Tab+150")
1147
-
1148
- # ===== 文本匹配评分 =====
1149
- # 优先匹配清理后的content_desc(完全匹配优先)
1150
- if query_lower == content_desc_clean_lower:
1151
- score += 150 # 完全匹配清理后的description(大幅加分)
1152
- score_details.append("完全匹配desc+150")
1153
- elif query_lower == content_desc_lower:
1154
- score += 140 # 完全匹配原始description(可能包含换行)
1155
- score_details.append("完全匹配原始desc+140")
1156
- elif query_lower in content_desc_clean_lower:
1157
- # 如果元素描述比查询长(如"游戏登录"包含"登录"),大幅降分
1158
- if len(content_desc_clean_lower) > len(query_lower):
1159
- score += 5 # 包含匹配但描述更长,大幅降分(避免匹配到"游戏登录")
1160
- score_details.append(f"包含匹配desc但更长({content_desc_clean_lower}包含{query_lower})+5")
1161
- else:
1162
- score += 30 # 包含匹配清理后的description(降分,避免部分匹配)
1163
- score_details.append("包含匹配desc+30")
1164
- elif query_lower in content_desc_lower:
1165
- # 如果元素描述比查询长,大幅降分
1166
- if len(content_desc_lower) > len(query_lower):
1167
- score += 3 # 包含匹配但描述更长,大幅降分
1168
- score_details.append(f"包含匹配原始desc但更长({content_desc_lower}包含{query_lower})+3")
1169
- else:
1170
- score += 25 # 包含匹配原始description(降分)
1171
- score_details.append("包含匹配原始desc+25")
1172
-
1173
- # 完全匹配text优先于部分匹配(重要)
1174
- if query_lower == text:
1175
- score += 80 # 完全匹配text
1176
- score_details.append("完全匹配text+80")
1177
- elif query_lower in text:
1178
- # 如果元素文本比查询长(如"游戏登录"包含"登录"),大幅降分
1179
- if len(text) > len(query_lower):
1180
- score += 5 # 包含匹配但文本更长,大幅降分(避免匹配到"游戏登录")
1181
- score_details.append(f"包含匹配text但更长({text}包含{query_lower})+5")
1182
- else:
1183
- score += 20 # 包含匹配text(降分,避免部分匹配)
1184
- score_details.append("包含匹配text+20")
1185
- elif text and query_lower in text: # 反向匹配(text包含查询)
1186
- score -= 30 # 如果text包含查询但不是完全匹配,大幅降分(避免匹配到已有文本)
1187
- score_details.append("反向匹配text-30")
1188
-
1189
- # 关键词匹配
1190
- if query_keywords == content_desc_clean_lower:
1191
- score += 95
1192
- score_details.append("关键词完全匹配+95")
1193
- elif query_keywords in content_desc_clean_lower:
1194
- score += 48
1195
- score_details.append("关键词包含匹配+48")
1196
-
1197
- # 文本匹配已在上面处理,这里不需要重复
1198
-
1199
- # ===== 元素属性加分 =====
1200
- # 优先选择可交互的元素
1201
- if element.get('clickable'):
1202
- score += 20 # 可点击元素
1203
- score_details.append("clickable+20")
1204
- if element.get('focusable'):
1205
- score += 10 # 可聚焦元素(输入框通常是focusable)
1206
- score_details.append("focusable+10")
1207
-
1208
- # 优先选择有resource-id的元素
1209
- if element.get('resource_id'):
1210
- score += 5
1211
- score_details.append("resource-id+5")
1212
-
1213
- # 页签特征:可点击+有文本/描述
1214
- if is_tab and (text or content_desc):
1215
- score += 15
1216
- score_details.append("Tab特征+15")
1217
-
1218
- # ===== 位置加分(输入框通常在页面上方,按顺序) =====
1219
- if is_input_query and is_textbox:
1220
- bounds = element.get('bounds', '')
1221
- if bounds:
1222
- # 解析bounds,Y坐标小的在上方
1223
- import re
1224
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
1225
- if match:
1226
- y1 = int(match.group(2))
1227
- # Y坐标越小(越靠上),分数越高(最多+50分)
1228
- # 假设屏幕高度2356,Y坐标在200-800之间是输入框常见位置
1229
- if 200 <= y1 <= 800:
1230
- # 对于"邮箱输入框",优先Y坐标更小的(第一个)
1231
- # 对于"密码输入框",优先Y坐标稍大的(第二个)
1232
- if "邮箱" in query:
1233
- # 邮箱输入框应该在第一个(Y坐标更小)
1234
- position_bonus = max(0, 50 - (y1 - 200) // 10)
1235
- score += position_bonus
1236
- score_details.append(f"位置Y={y1}(邮箱优先)+{position_bonus}")
1237
- elif "密码" in query:
1238
- # 密码输入框应该在第二个(Y坐标稍大)
1239
- # 如果Y坐标在400-700之间,给予加分
1240
- if 400 <= y1 <= 700:
1241
- position_bonus = max(0, 50 - abs(y1 - 550) // 10)
1242
- score += position_bonus
1243
- score_details.append(f"位置Y={y1}(密码优先)+{position_bonus}")
1244
- else:
1245
- score -= 20 # 位置不对,降分
1246
- score_details.append(f"位置Y={y1}(密码位置不对)-20")
1247
- else:
1248
- # 其他输入框,Y坐标越小越好
1249
- position_bonus = max(0, 30 - (y1 - 200) // 20)
1250
- score += position_bonus
1251
- score_details.append(f"位置Y={y1}+{position_bonus}")
1252
-
1253
- # 保存分数和详情
1254
- scored_elements.append((element, score, score_details))
1255
-
1256
- # 按分数排序,选择最佳匹配
1257
- scored_elements.sort(key=lambda x: x[1], reverse=True)
1258
- score_time = (time.time() - score_start) * 1000
1259
-
1260
- # 显示前5个的详细评分
1261
- print(f" 📊 评分详情(前5个) (⏱️ 评分: {score_time:.2f}ms):", file=sys.stderr)
1262
- for i, (elem, score, details) in enumerate(scored_elements[:5], 1):
1263
- text = elem.get('text', '')
1264
- desc = elem.get('content_desc', '')
1265
- class_name = elem.get('class_name', '')
1266
- desc_clean = desc.split('\n')[0] if desc else ''
1267
- print(f" [{i}] 分数={score:3d}: {desc_clean or text or class_name}", file=sys.stderr)
1268
- if details:
1269
- print(f" 详情: {' | '.join(details[:3])}", file=sys.stderr) # 只显示前3个加分项
1270
-
1271
- # 选择最佳匹配
1272
- best = scored_elements[0][0] if scored_elements else None
1273
- best_score = scored_elements[0][1] if scored_elements else 0
1274
-
1275
- # 已经在上面排序了,这里不需要再比较
1276
-
1277
- if best:
1278
- # 确定ref(优先resource-id,其次content_desc,最后text)
1279
- ref = best.get('resource_id')
1280
- if not ref:
1281
- # 如果description匹配,使用清理后的description定位(去除换行符)
1282
- content_desc = best.get('content_desc', '')
1283
- if content_desc:
1284
- # 清理content_desc(去除换行符和额外文本)
1285
- content_desc_clean = content_desc.split('\n')[0].strip()
1286
- content_desc_lower = content_desc.lower()
1287
- content_desc_clean_lower = content_desc_clean.lower()
1288
-
1289
- # 如果查询匹配清理后的description,使用清理后的值
1290
- if query_lower in content_desc_clean_lower or query_keywords in content_desc_clean_lower:
1291
- ref = content_desc_clean # 使用清理后的description
1292
- elif query_lower in content_desc_lower:
1293
- ref = content_desc_clean # 即使匹配原始,也使用清理后的
1294
- else:
1295
- ref = content_desc_clean # 默认使用清理后的
1296
- elif best.get('text'):
1297
- # 使用text定位(页签通常用text)
1298
- ref = best.get('text', '')
1299
- else:
1300
- ref = best.get('content_desc', '')
1301
-
1302
- # 确保ref不为空
1303
- if not ref:
1304
- # 如果还是没有ref,尝试使用bounds或class_name+索引
1305
- bounds = best.get('bounds', '')
1306
- class_name = best.get('class_name', '')
1307
-
1308
- if bounds:
1309
- # 使用bounds作为ref(格式:[x1,y1][x2,y2])
1310
- ref = bounds
1311
- print(f" ⚠️ 使用bounds作为ref: {bounds}", file=sys.stderr)
1312
- elif class_name:
1313
- # 使用class_name+索引(作为最后手段)
1314
- # 查找同类元素的索引
1315
- same_class_elements = [e for e in elements if e.get('class_name') == class_name]
1316
- index = same_class_elements.index(best) if best in same_class_elements else 0
1317
- ref = f"{class_name}[{index}]"
1318
- print(f" ⚠️ 使用class_name+索引作为ref: {ref}", file=sys.stderr)
1319
- else:
1320
- print(f" ⚠️ 找到匹配元素但无法确定ref: {best}", file=sys.stderr)
1321
- # 无法确定ref但有匹配元素,返回候选元素供AI分析
1322
- candidates = matched[:5] if matched else []
1323
- return (None, candidates)
1324
-
1325
- # 返回清理后的element描述
1326
- element_desc = best.get('content_desc', '') or best.get('text', '')
1327
- if element_desc and '\n' in element_desc:
1328
- element_desc = element_desc.split('\n')[0].strip()
1329
-
1330
- # 如果没有描述,使用查询文本或class_name
1331
- if not element_desc:
1332
- if query:
1333
- # 使用查询文本作为描述
1334
- element_desc = query
1335
- else:
1336
- element_desc = best.get('class_name', 'element')
1337
-
1338
- total_time = (time.time() - start_time) * 1000
1339
- print(f" 🎯 选择最佳匹配:", file=sys.stderr)
1340
- print(f" 元素: {element_desc}", file=sys.stderr)
1341
- print(f" ref: '{ref}'", file=sys.stderr)
1342
- print(f" 评分: {best_score}", file=sys.stderr)
1343
- print(f" 置信度: {min(95, 70 + best_score // 2)}%", file=sys.stderr)
1344
- print(f" ⏱️ XML深度分析总耗时: {total_time:.2f}ms", file=sys.stderr)
1345
-
1346
- result = {
1347
- 'element': element_desc,
1348
- 'ref': ref,
1349
- 'confidence': min(95, 70 + best_score // 2),
1350
- 'method': 'xml_analysis'
1351
- }
1352
- return (result, []) # 成功找到,不需要AI兜底
1353
-
1354
- # XML分析失败,但返回候选元素供AI分析
1355
- candidates = matched[:5] if matched else [] # 最多返回5个候选
1356
- return (None, candidates)
1357
-
1358
- async def _try_position_analysis(self, elements: list, query: str) -> Optional[Dict]:
1359
- """
1360
- 位置分析(Level 3.5)⭐ 新增
1361
-
1362
- 通过XML中的bounds信息定位无标识元素(如底部导航栏图标)
1363
-
1364
- 适用场景:
1365
- - "底部导航栏第X个图标"
1366
- - "顶部第X个图标"
1367
- - "右下角的按钮"
1368
-
1369
- Args:
1370
- elements: 已解析的元素列表
1371
- query: 查询文本
1372
-
1373
- Returns:
1374
- 定位结果 或 None
1375
- """
1376
- import time
1377
- start_time = time.time()
1378
-
1379
- # 检测是否是位置查询
1380
- position_keywords = [
1381
- '底部导航', '底部第', '底部图标',
1382
- '顶部导航', '顶部第', '顶部图标',
1383
- '右下角', '左下角', '右上角', '左上角',
1384
- '悬浮按钮', '悬浮', '加号', 'fab',
1385
- '第1个', '第2个', '第3个', '第4个', '第5个',
1386
- '第一个', '第二个', '第三个', '第四个', '第五个',
1387
- '最下面', '最上面', '最左边', '最右边',
1388
- '帖子', '按钮', '图标', # 支持通用的第N个描述
1389
- ]
1390
-
1391
- is_position_query = any(kw in query for kw in position_keywords)
1392
-
1393
- if not is_position_query:
1394
- return None
1395
-
1396
- print(f" 📍 Level 3.5: 位置分析...", file=sys.stderr)
1397
-
1398
- try:
1399
- from .position_analyzer import PositionAnalyzer
1400
-
1401
- # 获取屏幕尺寸(从第一个元素推测,或使用默认值)
1402
- screen_width = 1080
1403
- screen_height = 2400
1404
-
1405
- # 尝试从元素中获取屏幕尺寸
1406
- for elem in elements:
1407
- bounds = elem.get('bounds', '')
1408
- if bounds:
1409
- import re
1410
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
1411
- if match:
1412
- x2, y2 = int(match.group(3)), int(match.group(4))
1413
- screen_width = max(screen_width, x2)
1414
- screen_height = max(screen_height, y2)
1415
-
1416
- analyzer = PositionAnalyzer(screen_width, screen_height)
1417
-
1418
- # 根据查询类型选择分析方法(优先级:位置 > 序号)
1419
- result = None
1420
- if '悬浮' in query or '加号' in query or 'fab' in query.lower():
1421
- result = analyzer.analyze_floating_button(elements, query)
1422
- elif '右上角' in query or '上角' in query:
1423
- # 🎯 新增:右上角位置分析
1424
- print(f" 🎯 检测到'右上角'查询,调用 analyze_corner_position", file=sys.stderr)
1425
- result = analyzer.analyze_corner_position(elements, query, corner='top_right')
1426
- elif '左上角' in query:
1427
- result = analyzer.analyze_corner_position(elements, query, corner='top_left')
1428
- elif '右下角' in query:
1429
- result = analyzer.analyze_corner_position(elements, query, corner='bottom_right')
1430
- elif '左下角' in query:
1431
- result = analyzer.analyze_corner_position(elements, query, corner='bottom_left')
1432
- elif ('底部' in query and ('导航' in query or '图标' in query)) or ('底部' in query and any(kw in query for kw in ['第一个', '第二个', '第三个', '第四个', '第五个', '第1个', '第2个', '第3个', '第4个', '第5个'])):
1433
- # 🎯 修复:优先匹配"底部第X个图标"这种描述
1434
- print(f" 🎯 检测到'底部第X个'查询,调用 analyze_bottom_navigation", file=sys.stderr)
1435
- result = analyzer.analyze_bottom_navigation(elements, query)
1436
- elif ('顶部' in query and ('导航' in query or '图标' in query)) or ('顶部' in query and any(kw in query for kw in ['第一个', '第二个', '第三个', '第四个', '第五个', '第1个', '第2个', '第3个', '第4个', '第5个'])):
1437
- # 🎯 修复:优先匹配"顶部第X个图标"这种描述
1438
- print(f" 🎯 检测到'顶部第X个'查询,调用 analyze_top_navigation", file=sys.stderr)
1439
- result = analyzer.analyze_top_navigation(elements, query)
1440
- elif any(kw in query for kw in ['第一个', '第二个', '第三个', '第四个', '第五个', '第1个', '第2个', '第3个', '第4个', '第5个']):
1441
- # 通用的"第N个"定位(没有位置限定)
1442
- print(f" 🎯 检测到'第N个'查询,调用 analyze_nth_element", file=sys.stderr)
1443
- result = analyzer.analyze_nth_element(elements, query)
1444
- else:
1445
- # 其他位置查询(暂不支持)
1446
- print(f" ⚠️ 未匹配到任何位置分析方法", file=sys.stderr)
1447
- result = None
1448
-
1449
- if result:
1450
- elapsed = (time.time() - start_time) * 1000
1451
- print(f" ⏱️ 位置分析耗时: {elapsed:.2f}ms", file=sys.stderr)
1452
- return result
1453
-
1454
- except ImportError:
1455
- print(f" ⚠️ 位置分析器未安装", file=sys.stderr)
1456
- except Exception as e:
1457
- print(f" ⚠️ 位置分析失败: {e}", file=sys.stderr)
1458
-
1459
- return None
1460
-
1461
- async def _try_ai_candidates(self, query: str, candidates: list, all_elements: list) -> Optional[Dict]:
1462
- """
1463
- AI智能兜底 - 分析候选元素
1464
-
1465
- Args:
1466
- query: 用户查询
1467
- candidates: 候选元素列表
1468
- all_elements: 所有元素(用于构建上下文)
1469
- """
1470
- if not candidates:
1471
- return None
1472
-
1473
- try:
1474
- from ..ai.ai_analyzer import ai_analyzer
1475
-
1476
- # 构建上下文信息
1477
- context = f"页面共有{len(all_elements)}个元素,已筛选出{len(candidates)}个候选"
1478
-
1479
- # 调用AI分析
1480
- result = await ai_analyzer.analyze_candidates(query, candidates, context)
1481
- return result
1482
-
1483
- except ImportError:
1484
- print(f" ⚠️ AI分析器未配置", file=sys.stderr)
1485
- return None
1486
- except Exception as e:
1487
- print(f" ⚠️ AI智能兜底失败: {e}", file=sys.stderr)
1488
- return None
1489
-
1490
- async def _try_vision(self, query: str) -> Optional[Dict]:
1491
- """尝试视觉识别(多模态)"""
1492
- print(f" 👁️ Level 4: 尝试视觉识别...", file=sys.stderr)
1493
- try:
1494
- from ...vision.vision_locator import MobileVisionLocator
1495
-
1496
- vision_locator = MobileVisionLocator(self.mobile_client)
1497
- result = await vision_locator.locate_element_by_vision(query)
1498
-
1499
- if result and result.get('found'):
1500
- # 视觉识别返回的是坐标点,直接用于点击
1501
- x = result.get('x', 0)
1502
- y = result.get('y', 0)
1503
- confidence = result.get('confidence', 80)
1504
- print(f" ✅ 视觉识别成功: 坐标({x}, {y}), 置信度{confidence}%", file=sys.stderr)
1505
- return {
1506
- 'element': query,
1507
- 'ref': f"vision_coord_{x}_{y}", # 特殊标记,表示是坐标定位
1508
- 'confidence': confidence,
1509
- 'method': 'vision',
1510
- 'x': x,
1511
- 'y': y,
1512
- }
1513
- else:
1514
- reason = result.get('reason', 'unknown') if result else 'result is None'
1515
- print(f" ❌ 视觉识别未找到元素: {reason}", file=sys.stderr)
1516
- except ImportError:
1517
- print(" ⚠️ 视觉识别模块未安装(需要安装dashscope: pip install dashscope)", file=sys.stderr)
1518
- except Exception as e:
1519
- print(f" ❌ 视觉识别异常: {e}", file=sys.stderr)
1520
- import traceback
1521
- traceback.print_exc()
1522
-
1523
- return None
1524
-
1525
- async def _try_ai_analysis(self, query: str, elements: list = None) -> Optional[Dict]:
1526
- """
1527
- 尝试文本AI分析(最后手段)- 使用 mobile_mcp 独立的 AI 模块
1528
-
1529
- 注意:此方法已解耦,不再依赖 browser_mcp,使用 mobile_mcp/core/ai/ai_analyzer.py
1530
- """
1531
- print(f" 🤖 Level 4: 尝试AI分析...", file=sys.stderr)
1532
-
1533
- try:
1534
- # 导入 mobile_mcp 自己的 AI 模块
1535
- from ..ai.ai_analyzer import ai_analyzer
1536
-
1537
- # 检查 AI 是否配置
1538
- if not ai_analyzer.config.is_configured():
1539
- print(f" ⚠️ AI未配置,跳过AI分析。请配置环境变量: AI_API_KEY", file=sys.stderr)
1540
- return None
1541
-
1542
- # 如果没有传入 elements,先获取
1543
- if elements is None:
1544
- xml_string = self.mobile_client.u2.dump_hierarchy()
1545
- elements = self.mobile_client.xml_parser.parse(xml_string)
1546
-
1547
- # 过滤出可交互元素作为候选
1548
- candidates = []
1549
- for elem in elements:
1550
- # 优先添加可点击元素和输入框
1551
- if elem.get('clickable') or elem.get('focusable') or \
1552
- elem.get('class_name', '').endswith(('Button', 'EditText', 'TextView', 'ImageView')):
1553
- # 必须有文本、描述或resource_id才能识别
1554
- if elem.get('text') or elem.get('content_desc') or elem.get('resource_id'):
1555
- candidates.append(elem)
1556
-
1557
- if not candidates:
1558
- print(f" ⚠️ 未找到可交互候选元素", file=sys.stderr)
1559
- return None
1560
-
1561
- # 限制候选数量,避免 token 过多
1562
- max_candidates = 20
1563
- if len(candidates) > max_candidates:
1564
- print(f" 📋 候选元素过多({len(candidates)}),筛选前{max_candidates}个", file=sys.stderr)
1565
- candidates = candidates[:max_candidates]
1566
-
1567
- print(f" 🤖 使用AI分析 (模型: {ai_analyzer.config.model}),候选元素: {len(candidates)}个", file=sys.stderr)
1568
-
1569
- # 调用 AI 分析
1570
- result = await ai_analyzer.analyze_candidates(query, candidates)
1571
-
1572
- if result:
1573
- print(f" ✅ AI分析成功: {result.get('element', '')} (置信度: {result.get('confidence', 0)}%)", file=sys.stderr)
1574
- return result
1575
- else:
1576
- print(f" ❌ AI分析未找到匹配元素", file=sys.stderr)
1577
- return None
1578
-
1579
- except ImportError as e:
1580
- print(f" ⚠️ 无法导入AI模块: {e}", file=sys.stderr)
1581
- return None
1582
- except Exception as e:
1583
- print(f" ⚠️ AI分析异常: {e}", file=sys.stderr)
1584
- import traceback
1585
- traceback.print_exc()
1586
- return None
1587
-
1588
- def _convert_result(self, result: Dict, query: str = "", elements: list = None) -> Dict:
1589
- """
1590
- 转换结果为移动端格式
1591
-
1592
- SmartLocator返回的ref可能是:
1593
- 1. CSS选择器(如 "button.login-btn")- 需要重新定位
1594
- 2. resource-id(如 "com.app:id/login")- 直接使用
1595
- 3. text(如 "登录")- 直接使用
1596
- 4. bounds(如 "[100,200][300,400]")- 直接使用
1597
-
1598
- Args:
1599
- result: SmartLocator返回的结果
1600
- query: 查询文本
1601
- elements: 已解析的元素列表(可选,避免重复读取XML)
1602
- """
1603
- ref = result.get('ref', '')
1604
- element = result.get('element', '')
1605
-
1606
- print(f" 🔄 转换AI结果: ref='{ref}', element='{element}', query='{query}'", file=sys.stderr)
1607
-
1608
- # 🎯 优化:检测 XPath 格式(AI 常返回这种格式)
1609
- is_xpath = ref.startswith('//') or ref.startswith('//*[@')
1610
- if is_xpath:
1611
- print(f" ⚠️ 检测到XPath格式,需要重新定位: {ref}", file=sys.stderr)
1612
-
1613
- # 如果ref是CSS选择器、HTML标签或XPath格式,需要重新定位
1614
- # 这种情况下,使用query或element文本重新在XML中查找
1615
- html_tags = ['input', 'button', 'textbox', 'submit', 'textarea', 'select', 'a', 'div', 'span']
1616
- if is_xpath or '.' in ref or '#' in ref or ref.startswith('button') or ref.startswith('textbox') or ref.lower() in html_tags:
1617
- print(f" 🔍 检测到HTML标签/CSS选择器,重新定位...", file=sys.stderr)
1618
- # CSS选择器格式,需要重新定位
1619
- # 使用query或element文本在XML中查找
1620
-
1621
- # ⚡ 优化:如果传入了elements,直接使用;否则才读取XML
1622
- if elements is None:
1623
- xml_string = self.mobile_client.u2.dump_hierarchy()
1624
- elements = self.mobile_client.xml_parser.parse(xml_string)
1625
-
1626
- # 优先使用query,其次使用element
1627
- search_text = (query or element).lower()
1628
-
1629
- # 🔍 只在可点击元素中查找
1630
- clickable_elements = [e for e in elements if e.get('clickable') or e.get('class_name') in ['Button', 'ImageButton', 'EditText']]
1631
- print(f" 🔍 在{len(clickable_elements)}个可点击元素中查找 '{search_text}'", file=sys.stderr)
1632
-
1633
- for elem in clickable_elements:
1634
- elem_text = elem.get('text', '').lower()
1635
- elem_desc = elem.get('content_desc', '').lower()
1636
- elem_resource_id = elem.get('resource_id', '').lower()
1637
-
1638
- # 精确匹配(text或description完全包含查询文本)
1639
- # 🎯 改进:支持模糊匹配(忽略空格、括号等)
1640
- search_text_normalized = search_text.replace(' ', '').replace('(', '').replace(')', '').replace('(', '').replace(')', '')
1641
- elem_text_normalized = elem_text.replace(' ', '').replace('(', '').replace(')', '').replace('(', '').replace(')', '')
1642
- elem_desc_normalized = elem_desc.replace(' ', '').replace('(', '').replace(')', '').replace('(', '').replace(')', '')
1643
-
1644
- if search_text and (
1645
- (elem_text and search_text in elem_text) or
1646
- (elem_desc and search_text in elem_desc) or
1647
- (elem_text_normalized and search_text_normalized in elem_text_normalized) or
1648
- (elem_desc_normalized and search_text_normalized in elem_desc_normalized)
1649
- ):
1650
- # 找到匹配,优先使用text/description(更可靠),其次使用resource-id
1651
- new_ref = elem.get('text') or elem.get('content_desc') or elem.get('resource_id', '')
1652
- if new_ref:
1653
- print(f" ✅ 找到匹配元素: {new_ref}", file=sys.stderr)
1654
- result['ref'] = new_ref
1655
- result['method'] = 'rule_match_converted'
1656
- return result
1657
-
1658
- # 如果找不到,尝试使用element文本(去除"按钮"等后缀)
1659
- if element:
1660
- element_clean = element.replace('按钮', '').replace('输入框', '').strip().lower()
1661
- print(f" 🔍 尝试使用清洗后的element: '{element_clean}'", file=sys.stderr)
1662
- for elem in elements:
1663
- elem_text = elem.get('text', '').lower()
1664
- elem_desc = elem.get('content_desc', '').lower()
1665
- if element_clean in elem_text or elem_text in element_clean or element_clean in elem_desc or elem_desc in element_clean:
1666
- new_ref = elem.get('resource_id') or elem.get('text') or elem.get('content_desc', '')
1667
- if new_ref:
1668
- print(f" ✅ 找到匹配元素: {new_ref}", file=sys.stderr)
1669
- result['ref'] = new_ref
1670
- result['method'] = 'rule_match_converted'
1671
- return result
1672
-
1673
- print(f" ❌ 转换失败,未找到匹配元素", file=sys.stderr)
1674
- return None # 转换失败返回None,而不是返回原result
1675
-
1676
- # 其他格式(resource-id、text、bounds)直接返回
1677
- return result
1678
-
1679
- def _get_cache_key(self, query: str) -> str:
1680
- """生成缓存key"""
1681
- # 使用页面结构hash + 查询文本
1682
- snapshot_hash = hashlib.md5(
1683
- str(self.mobile_client._snapshot_cache or '').encode()
1684
- ).hexdigest()[:8]
1685
-
1686
- query_hash = hashlib.md5(query.encode()).hexdigest()[:8]
1687
-
1688
- return f"{snapshot_hash}_{query_hash}"
1689
-
1690
- async def _cache_result(self, query: str, result: Dict):
1691
- """缓存定位结果"""
1692
- cache_key = self._get_cache_key(query)
1693
- self._cache[cache_key] = {
1694
- 'result': result,
1695
- 'timestamp': time.time()
1696
- }
1697
-
1698
- def _log_performance(self, query: str, method: str, total_time: float, xml_count: int, xml_time: float = 0):
1699
- """
1700
- 记录性能日志
1701
-
1702
- Args:
1703
- query: 查询文本
1704
- method: 匹配方法
1705
- total_time: 总耗时(毫秒)
1706
- xml_count: XML读取次数
1707
- xml_time: XML读取耗时(毫秒)
1708
- """
1709
- self.performance_logs.append({
1710
- 'query': query,
1711
- 'method': method,
1712
- 'total_time': total_time,
1713
- 'xml_count': xml_count,
1714
- 'xml_time': xml_time,
1715
- })
1716
-
1717
- def print_performance_report(self):
1718
- """打印性能报告"""
1719
- print("\n" + "=" * 80, file=sys.stderr)
1720
- print("📊 性能监控报告", file=sys.stderr)
1721
- print("=" * 80, file=sys.stderr)
1722
-
1723
- print(f"\n📈 总体统计:", file=sys.stderr)
1724
- print(f" 总定位次数: {self.stats['total']}", file=sys.stderr)
1725
- print(f" 总耗时: {self.stats['total_time']:.2f}ms", file=sys.stderr)
1726
- print(f" 平均耗时: {self.stats['total_time'] / max(1, self.stats['total']):.2f}ms", file=sys.stderr)
1727
- print(f" XML总读取次数: {self.stats['xml_read_count']}", file=sys.stderr)
1728
-
1729
- print(f"\n🎯 匹配方式分布:", file=sys.stderr)
1730
- print(f" 缓存命中: {self.stats['cache_hits']} ({self.stats['cache_hits']/max(1, self.stats['total'])*100:.1f}%)", file=sys.stderr)
1731
- print(f" 快速预匹配: {self.stats['quick_match_hits']} ({self.stats['quick_match_hits']/max(1, self.stats['total'])*100:.1f}%)", file=sys.stderr)
1732
- print(f" 规则匹配: {self.stats['rule_hits']} ({self.stats['rule_hits']/max(1, self.stats['total'])*100:.1f}%)", file=sys.stderr)
1733
- print(f" XML深度分析: {self.stats['xml_analysis']} ({self.stats['xml_analysis']/max(1, self.stats['total'])*100:.1f}%)", file=sys.stderr)
1734
- print(f" 位置分析: {self.stats.get('position_analysis', 0)} ({self.stats.get('position_analysis', 0)/max(1, self.stats['total'])*100:.1f}%) ⭐", file=sys.stderr)
1735
- print(f" 视觉识别: {self.stats['vision_calls']} ({self.stats['vision_calls']/max(1, self.stats['total'])*100:.1f}%)", file=sys.stderr)
1736
- print(f" AI分析: {self.stats['ai_calls']} ({self.stats['ai_calls']/max(1, self.stats['total'])*100:.1f}%)", file=sys.stderr)
1737
-
1738
- if self.performance_logs:
1739
- print(f"\n📋 详细性能日志:", file=sys.stderr)
1740
- print(f"{'序号':<6}{'查询':<25}{'方法':<15}{'总耗时(ms)':<12}{'XML次数':<10}{'XML耗时(ms)':<12}", file=sys.stderr)
1741
- print("-" * 80, file=sys.stderr)
1742
- for i, log in enumerate(self.performance_logs, 1):
1743
- query_short = log['query'][:22] + '...' if len(log['query']) > 22 else log['query']
1744
- print(f"{i:<6}{query_short:<25}{log['method']:<15}{log['total_time']:<12.2f}{log['xml_count']:<10}{log['xml_time']:<12.2f}", file=sys.stderr)
1745
-
1746
- print("\n" + "=" * 80, file=sys.stderr)
1747
-