mobile-mcp-ai 2.1.2__py3-none-any.whl → 2.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. mobile_mcp/__init__.py +34 -0
  2. mobile_mcp/config.py +142 -0
  3. mobile_mcp/core/basic_tools_lite.py +3266 -0
  4. {core → mobile_mcp/core}/device_manager.py +2 -2
  5. mobile_mcp/core/dynamic_config.py +272 -0
  6. mobile_mcp/core/ios_client_wda.py +569 -0
  7. mobile_mcp/core/ios_device_manager_wda.py +306 -0
  8. {core → mobile_mcp/core}/mobile_client.py +279 -39
  9. mobile_mcp/core/template_matcher.py +429 -0
  10. mobile_mcp/core/templates/close_buttons/auto_x_0112_151217.png +0 -0
  11. mobile_mcp/core/templates/close_buttons/auto_x_0112_152037.png +0 -0
  12. mobile_mcp/core/templates/close_buttons/auto_x_0112_152840.png +0 -0
  13. mobile_mcp/core/templates/close_buttons/auto_x_0112_153256.png +0 -0
  14. mobile_mcp/core/templates/close_buttons/auto_x_0112_154847.png +0 -0
  15. mobile_mcp/core/templates/close_buttons/gray_x_stock_ad.png +0 -0
  16. {core → mobile_mcp/core}/utils/smart_wait.py +3 -3
  17. mobile_mcp/mcp_tools/__init__.py +10 -0
  18. mobile_mcp/mcp_tools/mcp_server.py +1071 -0
  19. mobile_mcp_ai-2.5.8.dist-info/METADATA +469 -0
  20. mobile_mcp_ai-2.5.8.dist-info/RECORD +32 -0
  21. mobile_mcp_ai-2.5.8.dist-info/entry_points.txt +2 -0
  22. mobile_mcp_ai-2.5.8.dist-info/licenses/LICENSE +201 -0
  23. mobile_mcp_ai-2.5.8.dist-info/top_level.txt +1 -0
  24. core/ai/__init__.py +0 -11
  25. core/ai/ai_analyzer.py +0 -197
  26. core/ai/ai_config.py +0 -116
  27. core/ai/ai_platform_adapter.py +0 -399
  28. core/ai/smart_test_executor.py +0 -520
  29. core/ai/test_generator.py +0 -365
  30. core/ai/test_generator_from_history.py +0 -391
  31. core/ai/test_generator_standalone.py +0 -293
  32. core/assertion/__init__.py +0 -9
  33. core/assertion/smart_assertion.py +0 -341
  34. core/basic_tools.py +0 -377
  35. core/h5/__init__.py +0 -10
  36. core/h5/h5_handler.py +0 -548
  37. core/ios_client.py +0 -219
  38. core/ios_device_manager.py +0 -252
  39. core/locator/__init__.py +0 -10
  40. core/locator/cursor_ai_auto_analyzer.py +0 -119
  41. core/locator/cursor_vision_helper.py +0 -414
  42. core/locator/mobile_smart_locator.py +0 -1640
  43. core/locator/position_analyzer.py +0 -813
  44. core/locator/script_updater.py +0 -157
  45. core/nl_test_runner.py +0 -585
  46. core/smart_app_launcher.py +0 -334
  47. core/smart_tools.py +0 -311
  48. mcp/__init__.py +0 -8
  49. mcp/mcp_server.py +0 -1919
  50. mcp/mcp_server_simple.py +0 -476
  51. mobile_mcp_ai-2.1.2.dist-info/METADATA +0 -567
  52. mobile_mcp_ai-2.1.2.dist-info/RECORD +0 -45
  53. mobile_mcp_ai-2.1.2.dist-info/entry_points.txt +0 -2
  54. mobile_mcp_ai-2.1.2.dist-info/top_level.txt +0 -4
  55. vision/__init__.py +0 -10
  56. vision/vision_locator.py +0 -404
  57. {core → mobile_mcp/core}/__init__.py +0 -0
  58. {core → mobile_mcp/core}/utils/__init__.py +0 -0
  59. {core → mobile_mcp/core}/utils/logger.py +0 -0
  60. {core → mobile_mcp/core}/utils/operation_history_manager.py +0 -0
  61. {utils → mobile_mcp/utils}/__init__.py +0 -0
  62. {utils → mobile_mcp/utils}/logger.py +0 -0
  63. {utils → mobile_mcp/utils}/xml_formatter.py +0 -0
  64. {utils → mobile_mcp/utils}/xml_parser.py +0 -0
  65. {mobile_mcp_ai-2.1.2.dist-info → mobile_mcp_ai-2.5.8.dist-info}/WHEEL +0 -0
@@ -1,1640 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- 移动端SmartLocator适配器 - 复用现有SmartLocator逻辑
5
-
6
- 策略:
7
- 1. Level 1: 规则匹配(免费,85%)
8
- 2. Level 2: 缓存查询(免费,5%)
9
- 3. Level 3: XML深度分析(免费,5%)
10
- 4. Level 4: 视觉识别(付费,4%)
11
- 5. Level 5: 文本AI分析(付费,1%)
12
- """
13
- import hashlib
14
- import time
15
- from typing import Dict, Optional
16
- # 复用现有的SmartLocator(通过导入,不修改原代码)
17
- import sys
18
- from pathlib import Path as PathLib
19
-
20
- # 添加browser_mcp路径以便导入
21
- # mobile_mcp现在在backend/mobile_mcp,browser_mcp在backend/mind-ui/browser_mcp
22
- current_file = PathLib(__file__)
23
- # 从 backend/mobile_mcp/core/locator/mobile_smart_locator.py
24
- # 到 backend/mind-ui/browser_mcp
25
- # 路径: backend/mobile_mcp/core/locator -> backend/mind-ui
26
- mind_ui_path = current_file.parent.parent.parent.parent / 'mind-ui'
27
- if mind_ui_path.exists():
28
- sys.path.insert(0, str(mind_ui_path))
29
-
30
- try:
31
- from browser_mcp.core.locator.smart_locator import SmartLocator
32
- SMART_LOCATOR_AVAILABLE = True
33
- except ImportError:
34
- SMART_LOCATOR_AVAILABLE = False
35
- print("⚠️ 无法导入SmartLocator,将使用简化版本", file=sys.stderr)
36
-
37
-
38
- class MobileSmartLocator:
39
- """
40
- 移动端SmartLocator适配器
41
-
42
- 复用现有SmartLocator逻辑,适配移动端格式
43
- """
44
-
45
- def __init__(self, mobile_client):
46
- """
47
- 初始化移动端SmartLocator
48
-
49
- Args:
50
- mobile_client: MobileClient实例
51
- """
52
- self.mobile_client = mobile_client
53
-
54
- # 缓存
55
- self._cache: Dict[str, Dict] = {}
56
- self._cache_ttl = 300 # 5分钟
57
-
58
- # 统计
59
- self.stats = {
60
- 'total': 0,
61
- 'rule_hits': 0,
62
- 'cache_hits': 0,
63
- 'quick_match_hits': 0,
64
- 'xml_analysis': 0,
65
- 'vision_calls': 0,
66
- 'ai_calls': 0,
67
- 'xml_read_count': 0, # XML读取次数
68
- 'total_time': 0.0, # 总耗时(毫秒)
69
- }
70
-
71
- # 性能监控
72
- self.performance_logs = [] # 详细性能日志
73
-
74
- # 如果可用,复用现有SmartLocator
75
- if SMART_LOCATOR_AVAILABLE:
76
- # 创建适配器,让SmartLocator可以调用mobile_client的方法
77
- self.smart_locator = SmartLocator(self._create_adapter())
78
- else:
79
- self.smart_locator = None
80
-
81
- def _create_adapter(self):
82
- """创建适配器,让SmartLocator可以调用mobile_client的方法"""
83
- class Adapter:
84
- def __init__(self, mobile_client):
85
- self.mobile_client = mobile_client
86
-
87
- async def snapshot(self):
88
- # 返回格式化的字符串,SmartLocator的规则匹配器会调用extract_snapshot_content
89
- # extract_snapshot_content会处理字符串类型
90
- snapshot_str = await self.mobile_client.snapshot()
91
-
92
- # 包装成类似MCP CallToolResult的格式,以便兼容
93
- class SnapshotResult:
94
- def __init__(self, text):
95
- self.content = [type('Content', (), {'text': text})()]
96
-
97
- return SnapshotResult(snapshot_str)
98
-
99
- return Adapter(self.mobile_client)
100
-
101
- async def locate(self, query: str, wait_for_popup: bool = True, max_wait: float = 3.0) -> Optional[Dict]:
102
- """
103
- 智能定位元素
104
-
105
- Args:
106
- query: 自然语言查询
107
- wait_for_popup: 是否等待弹窗出现(默认True,适用于弹窗场景)
108
- max_wait: 最大等待时间(秒,默认3秒)
109
-
110
- Returns:
111
- 定位结果 或 None
112
- """
113
- import time
114
- start_time = time.time()
115
-
116
- self.stats['total'] += 1
117
-
118
- print(f"\n🔍 MobileSmartLocator 定位: {query}", file=sys.stderr)
119
-
120
- # Level 1: 缓存查询(最快)
121
- cache_start = time.time()
122
- cache_result = await self._try_cache(query)
123
- cache_time = (time.time() - cache_start) * 1000
124
-
125
- if cache_result:
126
- self.stats['cache_hits'] += 1
127
- elapsed_time = (time.time() - start_time) * 1000
128
- self.stats['total_time'] += elapsed_time
129
- print(f" ✅ 缓存命中!耗时: {elapsed_time:.2f}ms", file=sys.stderr)
130
- self._log_performance(query, 'cache', elapsed_time, 0)
131
- return cache_result
132
-
133
- # 🎯 弹窗场景:如果启用等待,先等待一段时间让弹窗出现
134
- if wait_for_popup:
135
- import asyncio
136
- await asyncio.sleep(0.5) # 先等待0.5秒,让弹窗有时间出现
137
-
138
- # ⚡ 优化:一次定位只读一次XML(避免重复读取,节省400-1000ms)
139
- print(f" 📱 读取页面XML...", file=sys.stderr)
140
-
141
- # 分步计时:XML读取
142
- xml_read_start = time.time()
143
- xml_string = self.mobile_client.u2.dump_hierarchy()
144
- xml_read_time = (time.time() - xml_read_start) * 1000
145
- print(f" ⏱️ XML读取: {xml_read_time:.2f}ms", file=sys.stderr)
146
-
147
- # 分步计时:XML解析
148
- xml_parse_start = time.time()
149
- elements = self.mobile_client.xml_parser.parse(xml_string)
150
- xml_parse_time = (time.time() - xml_parse_start) * 1000
151
- print(f" ⏱️ XML解析: {xml_parse_time:.2f}ms (共{len(elements)}个元素)", file=sys.stderr)
152
-
153
- xml_time = xml_read_time + xml_parse_time
154
- self.stats['xml_read_count'] += 1
155
- print(f" ✅ XML处理完成,总耗时: {xml_time:.2f}ms (读取: {xml_read_time:.0f}ms + 解析: {xml_parse_time:.0f}ms)", file=sys.stderr)
156
-
157
- # Level 1.5: 快速预匹配(针对容易歧义的查询)
158
- # 例如:"点击 输入邮箱" - 包含"输入"但不是输入操作,而是页签
159
- quick_result = await self._try_quick_match(elements, query)
160
- if quick_result:
161
- self.stats['quick_match_hits'] += 1
162
- elapsed_time = (time.time() - start_time) * 1000
163
- self.stats['total_time'] += elapsed_time
164
- print(f" ✅ 快速预匹配成功!总耗时: {elapsed_time:.2f}ms (XML: {xml_time:.2f}ms)", file=sys.stderr)
165
- await self._cache_result(query, quick_result)
166
- self._log_performance(query, 'quick_match', elapsed_time, 1, xml_time)
167
- return quick_result
168
-
169
- # Level 2: 规则匹配(如果SmartLocator可用)
170
- if self.smart_locator:
171
- rule_result = await self._try_rule_match(elements, query)
172
- if rule_result:
173
- self.stats['rule_hits'] += 1
174
- elapsed_time = (time.time() - start_time) * 1000
175
- self.stats['total_time'] += elapsed_time
176
- print(f" ✅ 规则匹配成功!总耗时: {elapsed_time:.2f}ms (XML: {xml_time:.2f}ms)", file=sys.stderr)
177
- await self._cache_result(query, rule_result)
178
- self._log_performance(query, 'rule_match', elapsed_time, 1, xml_time)
179
- return rule_result
180
-
181
- # Level 3: XML深度分析(免费,快速)
182
- xml_result, candidates = await self._try_xml_analysis(elements, query)
183
- if xml_result:
184
- self.stats['xml_analysis'] += 1
185
- elapsed_time = (time.time() - start_time) * 1000
186
- self.stats['total_time'] += elapsed_time
187
- print(f" ✅ XML分析成功: {xml_result.get('element', '')} 总耗时: {elapsed_time:.2f}ms (XML: {xml_time:.2f}ms)", file=sys.stderr)
188
- await self._cache_result(query, xml_result)
189
- self._log_performance(query, 'xml_analysis', elapsed_time, 1, xml_time)
190
- return xml_result
191
-
192
- # Level 3.5: 位置分析(免费,快速)⭐ 新增
193
- position_result = await self._try_position_analysis(elements, query)
194
- if position_result:
195
- self.stats['position_analysis'] = self.stats.get('position_analysis', 0) + 1
196
- elapsed_time = (time.time() - start_time) * 1000
197
- self.stats['total_time'] += elapsed_time
198
- print(f" ✅ 位置分析成功!总耗时: {elapsed_time:.2f}ms (XML: {xml_time:.2f}ms)", file=sys.stderr)
199
- await self._cache_result(query, position_result)
200
- self._log_performance(query, 'position_analysis', elapsed_time, 1, xml_time)
201
- return position_result
202
-
203
- # 🎯 架构优化:检测弹窗/覆盖层场景
204
- # 如果XML元素很少(<50个),可能是弹窗/覆盖层,优先使用视觉识别
205
- is_popup_scenario = len(elements) < 50 and not candidates
206
-
207
- # Level 3.6: AI智能兜底(分析候选元素)
208
- # 前提:有候选元素(说明XML中有相关元素,只是不确定选哪个)
209
- if candidates:
210
- print(f" 📋 Level 3.6: AI智能兜底 (有{len(candidates)}个候选元素)...", file=sys.stderr)
211
- ai_result = await self._try_ai_candidates(query, candidates, elements)
212
- if ai_result:
213
- self.stats['ai_calls'] += 1
214
- elapsed_time = (time.time() - start_time) * 1000
215
- self.stats['total_time'] += elapsed_time
216
- print(f" ✅ AI智能兜底成功!总耗时: {elapsed_time:.2f}ms (XML: {xml_time:.2f}ms)", file=sys.stderr)
217
- await self._cache_result(query, ai_result)
218
- self._log_performance(query, 'ai_smart_fallback', elapsed_time, 1, xml_time)
219
- return ai_result
220
-
221
- # 🎯 架构优化:弹窗场景优先使用视觉识别
222
- # 如果XML元素很少且没有候选,说明可能是弹窗/覆盖层,视觉识别更有效
223
- if is_popup_scenario:
224
- print(f" 🎯 检测到弹窗场景(XML元素少: {len(elements)}个),优先使用视觉识别...", file=sys.stderr)
225
- vision_result = await self._try_vision(query)
226
- if vision_result:
227
- self.stats['vision_calls'] += 1
228
- elapsed_time = (time.time() - start_time) * 1000
229
- self.stats['total_time'] += elapsed_time
230
- print(f" ✅ 视觉识别成功!总耗时: {elapsed_time:.2f}ms", file=sys.stderr)
231
- await self._cache_result(query, vision_result)
232
- self._log_performance(query, 'vision', elapsed_time, 1, xml_time)
233
- return vision_result
234
-
235
- # Level 4: 文本AI分析(需要AI配置)
236
- # 场景:XML中有元素但无法匹配(需要AI理解语义)
237
- print(f" ⚠️ XML分析失败,尝试AI分析...", file=sys.stderr)
238
- ai_result = await self._try_ai_analysis(query, elements)
239
- if ai_result:
240
- self.stats['ai_calls'] += 1
241
- elapsed_time = (time.time() - start_time) * 1000
242
- self.stats['total_time'] += elapsed_time
243
- print(f" ✅ AI分析成功!总耗时: {elapsed_time:.2f}ms", file=sys.stderr)
244
- await self._cache_result(query, ai_result)
245
- self._log_performance(query, 'ai_analysis', elapsed_time, 1, xml_time) # 传入已解析的elements,不重复读XML
246
- return ai_result
247
-
248
- # Level 5: 视觉识别(最后兜底,多模态)
249
- # 场景:所有方法都失败,视觉识别是最后手段
250
- vision_result = None
251
- if not is_popup_scenario: # 如果之前已经尝试过视觉识别,不再重复
252
- print(f" ⚠️ AI分析也失败,尝试视觉识别(最后兜底)...", file=sys.stderr)
253
- vision_result = await self._try_vision(query)
254
- if vision_result:
255
- self.stats['vision_calls'] += 1
256
- elapsed_time = (time.time() - start_time) * 1000
257
- self.stats['total_time'] += elapsed_time
258
- print(f" ✅ 视觉识别成功!总耗时: {elapsed_time:.2f}ms", file=sys.stderr)
259
- await self._cache_result(query, vision_result)
260
- self._log_performance(query, 'vision', elapsed_time, 1, xml_time)
261
- return vision_result
262
-
263
- # 🎯 最后兜底:使用Cursor AI视觉识别(截图分析)
264
- # 类似@browser的行为:当所有定位方法都失败时,自动截图并请求Cursor AI分析
265
- # ⚠️ 如果查询包含位置信息(如"右上角"),且位置分析已失败,直接返回None,不等待Cursor AI
266
- position_keywords = ['右上角', '左上角', '右下角', '左下角', '顶部', '底部', '左侧', '右侧']
267
- has_position_keyword = any(kw in query for kw in position_keywords)
268
-
269
- if has_position_keyword:
270
- elapsed_time = (time.time() - start_time) * 1000
271
- print(f" ❌ 所有定位方法都失败(包含位置关键词,不使用Cursor AI),总耗时: {elapsed_time:.2f}ms", file=sys.stderr)
272
- return None
273
- try:
274
- from .cursor_vision_helper import CursorVisionHelper
275
- cursor_helper = CursorVisionHelper(self.mobile_client)
276
- # 🎯 直接截图并创建请求文件,不等待(让Cursor AI主动分析)
277
- # 智能选择截图区域
278
- region = cursor_helper._smart_region_selection(query)
279
- screenshot_path = await cursor_helper.take_screenshot(query, region=region)
280
-
281
- # 创建请求文件
282
- import datetime
283
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
284
- request_id = f"{timestamp}_{hash(query) % 10000}"
285
- request_file = cursor_helper.request_dir / f"request_{request_id}.json"
286
-
287
- request_data = {
288
- "request_id": request_id,
289
- "screenshot_path": screenshot_path,
290
- "element_desc": query,
291
- "region": region,
292
- "timestamp": timestamp,
293
- "status": "pending"
294
- }
295
-
296
- with open(request_file, 'w', encoding='utf-8') as f:
297
- import json
298
- json.dump(request_data, f, ensure_ascii=False, indent=2)
299
-
300
- print(f" 📸 已截图并创建分析请求 (request_id: {request_id})", file=sys.stderr)
301
-
302
- # 🎯 返回特殊标记,让MCP服务器知道需要Cursor AI分析
303
- # 返回一个包含请求信息的字典,而不是None
304
- return {
305
- 'element': query,
306
- 'ref': f"cursor_vision_request_{request_id}",
307
- 'confidence': 0,
308
- 'method': 'cursor_vision_pending',
309
- 'screenshot_path': screenshot_path,
310
- 'request_id': request_id,
311
- 'status': 'pending_analysis'
312
- }
313
- except Exception as e:
314
- print(f" ⚠️ Cursor视觉识别失败: {e}", file=sys.stderr)
315
- import traceback
316
- traceback.print_exc()
317
-
318
- elapsed_time = (time.time() - start_time) * 1000
319
- print(f" ❌ 所有定位方法都失败(包括Cursor视觉识别),总耗时: {elapsed_time:.2f}ms", file=sys.stderr)
320
- return None
321
-
322
- async def _try_cache(self, query: str) -> Optional[Dict]:
323
- """尝试从缓存获取"""
324
- cache_key = self._get_cache_key(query)
325
- if cache_key in self._cache:
326
- cached = self._cache[cache_key]
327
- if time.time() - cached['timestamp'] < self._cache_ttl:
328
- return cached['result']
329
- else:
330
- # 缓存过期
331
- del self._cache[cache_key]
332
- return None
333
-
334
- async def _try_quick_match(self, elements: list, query: str) -> Optional[Dict]:
335
- """
336
- 快速预匹配(针对容易歧义的查询)
337
-
338
- 场景:
339
- 1. "输入邮箱" - 包含"输入"但实际是页签,不是输入操作
340
- 2. "输入XXX"但不是"输入框" - 可能是页签/按钮,不是输入操作
341
- 3. "登陆" → "登录" - 同义词替换
342
- 4. "点击XX按钮" → "XX" - 去除无意义词
343
- 5. resource-id直接匹配
344
-
345
- 策略:
346
- - 完全匹配优先(准确性第一)
347
- - 去除无意义词再匹配
348
- - 同义词自动替换
349
- """
350
- import time
351
- start_time = time.time()
352
-
353
- query_lower = query.lower().strip()
354
-
355
- # ⚡ 优化1: 同义词替换
356
- if "登陆" in query_lower:
357
- query_lower = query_lower.replace("登陆", "登录")
358
- print(f" ⚡ 同义词替换: '登陆' → '登录'", file=sys.stderr)
359
-
360
- # ⚡ 优化2: resource-id快速匹配(如果query包含:id/或com.开头)
361
- if ":id/" in query or query.startswith("com."):
362
- print(f" ⚡ 检测到resource-id格式,直接匹配", file=sys.stderr)
363
- for elem in elements:
364
- if elem.get('resource_id') == query:
365
- print(f" ✅ resource-id完全匹配: {query}", file=sys.stderr)
366
- return {
367
- 'element': query,
368
- 'ref': query,
369
- 'confidence': 100,
370
- 'method': 'quick_match_resource_id'
371
- }
372
-
373
- # ⚡ 优化3: 去除无意义词,提取关键词
374
- query_clean = query_lower
375
- removed_words = []
376
- if "点击" in query_clean:
377
- query_clean = query_clean.replace("点击", "").strip()
378
- removed_words.append("点击")
379
- if "按钮" in query_clean and "输入框" not in query_clean:
380
- query_clean = query_clean.replace("按钮", "").strip()
381
- removed_words.append("按钮")
382
-
383
- if removed_words:
384
- print(f" ⚡ 去除无意义词: {', '.join(removed_words)} → '{query_clean}'", file=sys.stderr)
385
-
386
- # 判断是否可能被误判为输入操作
387
- has_input_keyword = "输入" in query_lower
388
- is_not_input_box = "输入框" not in query_lower
389
-
390
- # 如果包含"输入"但不是"输入框",可能是页签/按钮(如"输入邮箱"页签)
391
- # 或者去除了无意义词后,都应该在clickable元素中优先查找
392
- if (has_input_keyword and is_not_input_box) or removed_words:
393
- if has_input_keyword and is_not_input_box:
394
- print(f" ⚡ 快速预匹配: 检测到'输入'但不是'输入框',先查找clickable元素", file=sys.stderr)
395
-
396
- # 在clickable元素中查找
397
- filter_start = time.time()
398
- clickable_elements = [e for e in elements if e.get('clickable', False)]
399
- filter_time = (time.time() - filter_start) * 1000
400
- print(f" ⏱️ 预过滤: {filter_time:.2f}ms (从{len(elements)}个筛选到{len(clickable_elements)}个clickable)", file=sys.stderr)
401
-
402
- # ⚡ 优化4: 完全匹配优先(最重要!)
403
- match_start = time.time()
404
- for elem in clickable_elements:
405
- text = elem.get('text', '').lower()
406
- content_desc = elem.get('content_desc', '').lower()
407
- # 清理content_desc(去除换行符和额外文本)
408
- content_desc_clean = content_desc.split('\n')[0].strip() if content_desc else ''
409
- content_desc_clean_lower = content_desc_clean.lower()
410
-
411
- # 完全匹配优先(使用清理后的query)
412
- if query_clean == content_desc_clean_lower or query_clean == text:
413
- # 找到完全匹配!
414
- match_time = (time.time() - match_start) * 1000
415
- ref = elem.get('resource_id') or content_desc_clean or text
416
- element_desc = content_desc_clean or text or query
417
-
418
- total_time = (time.time() - start_time) * 1000
419
- print(f" ✅ 完全匹配(清理后): {element_desc}", file=sys.stderr)
420
- print(f" ⏱️ 匹配耗时: {match_time:.2f}ms | 快速预匹配总耗时: {total_time:.2f}ms", file=sys.stderr)
421
-
422
- return {
423
- 'element': element_desc,
424
- 'ref': ref,
425
- 'confidence': 95,
426
- 'method': 'quick_match'
427
- }
428
-
429
- # 原始query也试试完全匹配
430
- if query_lower == content_desc_clean_lower or query_lower == text:
431
- match_time = (time.time() - match_start) * 1000
432
- ref = elem.get('resource_id') or content_desc_clean or text
433
- element_desc = content_desc_clean or text or query
434
-
435
- total_time = (time.time() - start_time) * 1000
436
- print(f" ✅ 完全匹配(原始): {element_desc}", file=sys.stderr)
437
- print(f" ⏱️ 匹配耗时: {match_time:.2f}ms | 快速预匹配总耗时: {total_time:.2f}ms", file=sys.stderr)
438
-
439
- return {
440
- 'element': element_desc,
441
- 'ref': ref,
442
- 'confidence': 95,
443
- 'method': 'quick_match'
444
- }
445
-
446
- # 完全匹配失败,再尝试包含匹配(降级)
447
- match_time = (time.time() - match_start) * 1000
448
- print(f" ⏱️ 完全匹配遍历: {match_time:.2f}ms (未找到)", file=sys.stderr)
449
-
450
- contain_start = time.time()
451
- for elem in clickable_elements:
452
- text = elem.get('text', '').lower()
453
- content_desc = elem.get('content_desc', '').lower()
454
- content_desc_clean = content_desc.split('\n')[0].strip() if content_desc else ''
455
- content_desc_clean_lower = content_desc_clean.lower()
456
-
457
- # 包含匹配(使用清理后的query)
458
- if query_clean in content_desc_clean_lower or query_clean in text:
459
- contain_time = (time.time() - contain_start) * 1000
460
- ref = elem.get('resource_id') or content_desc_clean or text
461
- element_desc = content_desc_clean or text or query
462
-
463
- total_time = (time.time() - start_time) * 1000
464
- print(f" ✅ 包含匹配: {element_desc}", file=sys.stderr)
465
- print(f" ⏱️ 包含匹配耗时: {contain_time:.2f}ms | 快速预匹配总耗时: {total_time:.2f}ms", file=sys.stderr)
466
-
467
- return {
468
- 'element': element_desc,
469
- 'ref': ref,
470
- 'confidence': 85,
471
- 'method': 'quick_match'
472
- }
473
-
474
- total_time = (time.time() - start_time) * 1000
475
- if total_time > 5: # 只有超过5ms才打印
476
- print(f" ⏱️ 快速预匹配: {total_time:.2f}ms (未匹配)", file=sys.stderr)
477
- return None
478
-
479
- async def _try_rule_match(self, elements: list, query: str) -> Optional[Dict]:
480
- """
481
- 尝试规则匹配(复用SmartLocator)
482
-
483
- Args:
484
- elements: 已解析的元素列表(用于转换结果时复用)
485
- query: 查询文本
486
- """
487
- if not self.smart_locator:
488
- return None
489
-
490
- # ⚡ 同义词替换(规则匹配阶段)
491
- query_processed = query
492
- if "登陆" in query:
493
- query_processed = query.replace("登陆", "登录")
494
- print(f" ⚡ 同义词替换(规则匹配): '登陆' → '登录'", file=sys.stderr)
495
-
496
- # 定义AI函数(用于降级,但这里先不调用)
497
- async def ai_func(client, q: str):
498
- return None # 规则匹配阶段不调用AI
499
-
500
- # 调用SmartLocator,跳过AI
501
- result = await self.smart_locator.locate(query_processed, ai_func=ai_func, skip_ai=True)
502
-
503
- if result:
504
- # 转换结果为移动端格式(传入elements避免重复读取XML)
505
- return self._convert_result(result, query, elements)
506
-
507
- return None
508
-
509
- async def _try_xml_analysis(self, elements: list, query: str):
510
- """
511
- XML深度分析
512
-
513
- Args:
514
- elements: 已解析的元素列表(复用,避免重复读取XML)
515
- query: 查询文本
516
-
517
- Returns:
518
- (result, candidates): result为定位结果,candidates为候选元素列表(用于AI兜底)
519
- """
520
- import time
521
- start_time = time.time()
522
-
523
- print(f" 📋 Level 3: XML深度分析...", file=sys.stderr)
524
-
525
- # 🎯 优化:只在调试模式下打印XML结构预览
526
- # 通过环境变量 MOBILE_MCP_DEBUG=1 开启
527
- import os
528
- debug_mode = os.getenv('MOBILE_MCP_DEBUG', '0') == '1'
529
-
530
- if debug_mode:
531
- # 打印XML结构(调试用)
532
- print(f" 📄 XML结构预览(共{len(elements)}个元素):", file=sys.stderr)
533
- print(f" {'─' * 60}", file=sys.stderr)
534
-
535
- # 只打印前20个有意义的元素(避免输出过多)
536
- meaningful_elements = [
537
- e for e in elements
538
- if e.get('text') or e.get('content_desc') or e.get('resource_id') or e.get('clickable')
539
- ][:20]
540
-
541
- for i, elem in enumerate(meaningful_elements, 1):
542
- text = elem.get('text', '')
543
- desc = elem.get('content_desc', '')
544
- resource_id = elem.get('resource_id', '')
545
- class_name = elem.get('class_name', '')
546
- clickable = elem.get('clickable', False)
547
- focusable = elem.get('focusable', False)
548
-
549
- # 格式化输出
550
- parts = []
551
- if text:
552
- parts.append(f"text='{text[:30]}'")
553
- if desc:
554
- desc_clean = desc.split('\n')[0][:30]
555
- parts.append(f"desc='{desc_clean}'")
556
- if resource_id:
557
- parts.append(f"id='{resource_id[:30]}'")
558
- if class_name:
559
- parts.append(f"class={class_name}")
560
- if clickable:
561
- parts.append("[clickable]")
562
- if focusable:
563
- parts.append("[focusable]")
564
-
565
- print(f" {i:2d}. {' | '.join(parts) if parts else 'empty element'}", file=sys.stderr)
566
-
567
- if len(meaningful_elements) < len([e for e in elements if e.get('text') or e.get('content_desc')]):
568
- print(f" ... (还有更多元素,共{len(elements)}个)", file=sys.stderr)
569
- print(f" {'─' * 60}", file=sys.stderr)
570
-
571
- # 文本匹配
572
- query_lower = query.lower().strip()
573
-
574
- # ⚡ 同义词处理:登陆 -> 登录
575
- if "登陆" in query_lower:
576
- query_lower = query_lower.replace("登陆", "登录")
577
- print(f" ⚡ 同义词替换: '登陆' → '登录'", file=sys.stderr)
578
-
579
- matched = []
580
-
581
- # 提取关键词(去除"输入框"、"按钮"等后缀)
582
- query_keywords = query_lower
583
- if "输入框" in query:
584
- query_keywords = query_lower.replace("输入框", "").strip()
585
- elif "按钮" in query:
586
- query_keywords = query_lower.replace("按钮", "").strip()
587
- elif "页签" in query or "标签" in query:
588
- query_keywords = query_lower.replace("页签", "").replace("标签", "").strip()
589
- elif "图标" in query:
590
- query_keywords = query_lower.replace("图标", "").strip()
591
-
592
- # 判断查询类型:输入框 vs 页签/按钮 vs 图标
593
- is_input_query = "输入框" in query or "输入" in query
594
- is_tab_query = "页签" in query or "标签" in query or ("点击" in query and "输入" not in query)
595
- is_icon_query = "图标" in query or ("搜索" in query and "图标" in query) or ("右上角" in query and "图标" in query)
596
-
597
- # 🚀 性能优化策略(准确性优先 + 速度优化)
598
-
599
- # 步骤1: 根据查询类型预过滤元素(大幅减少遍历范围,提速50%+)
600
- filter_start = time.time()
601
- candidate_elements = []
602
-
603
- if is_input_query and "输入框" in query:
604
- # 查询输入框:只看EditText类型(准确性优先)
605
- candidate_elements = [e for e in elements if e.get('class_name', '').lower() in ['edittext', 'textfield']]
606
- filter_time = (time.time() - filter_start) * 1000
607
- if len(candidate_elements) < len(elements):
608
- print(f" 🎯 输入框查询优化: 从{len(elements)}个元素缩减到{len(candidate_elements)}个EditText (⏱️ {filter_time:.2f}ms)", file=sys.stderr)
609
-
610
- # 特殊处理:如果查询输入框,直接匹配所有EditText(包括空的)
611
- # 这样可以匹配到空输入框,后续通过评分选择最佳
612
- match_start = time.time() # 定义match_start
613
- matched = candidate_elements
614
- match_time = (time.time() - match_start) * 1000
615
- print(f" ✅ 找到 {len(matched)} 个EditText元素(包括空输入框) (⏱️ {match_time:.2f}ms)", file=sys.stderr)
616
-
617
- elif is_icon_query:
618
- # 🎯 图标查询优化:优先从顶部区域筛选
619
- # 1. 先筛选可点击的图标元素(Image/ImageView类型,或者无文本的可点击元素)
620
- icon_elements = []
621
- for e in elements:
622
- if not e.get('clickable', False):
623
- continue
624
-
625
- class_name = e.get('class_name', '').lower()
626
- text = e.get('text', '')
627
- content_desc = e.get('content_desc', '')
628
-
629
- # 图标特征:Image类型,或者无文本的可点击元素(可能是图标)
630
- is_image_type = ('image' in class_name or class_name in ['imageview', 'imagebutton'])
631
- is_icon_like = not text and not content_desc # 无文本描述,可能是图标
632
-
633
- if is_image_type or is_icon_like:
634
- icon_elements.append(e)
635
-
636
- # 2. 如果查询包含"右上角"、"顶部"等位置描述,优先筛选顶部区域元素
637
- if "右上角" in query or "顶部" in query or "上角" in query:
638
- # 解析bounds,筛选Y坐标较小的元素(顶部区域)
639
- screen_height = 2400 # 默认屏幕高度,可以从设备获取
640
- top_threshold = screen_height * 0.3 # 顶部30%区域
641
-
642
- top_icon_elements = []
643
- for elem in icon_elements:
644
- bounds = elem.get('bounds', '')
645
- import re
646
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
647
- if match:
648
- y1 = int(match.group(2))
649
- if y1 < top_threshold:
650
- top_icon_elements.append(elem)
651
-
652
- if top_icon_elements:
653
- candidate_elements = top_icon_elements
654
- else:
655
- candidate_elements = icon_elements
656
- else:
657
- candidate_elements = icon_elements
658
-
659
- filter_time = (time.time() - filter_start) * 1000
660
- if "右上角" in query or "顶部" in query or "上角" in query:
661
- print(f" 🎯 图标查询优化(顶部区域): 从{len(elements)}个元素缩减到{len(candidate_elements)}个顶部图标元素 (⏱️ {filter_time:.2f}ms)", file=sys.stderr)
662
- else:
663
- print(f" 🎯 图标查询优化: 从{len(elements)}个元素缩减到{len(candidate_elements)}个图标元素 (⏱️ {filter_time:.2f}ms)", file=sys.stderr)
664
-
665
- # 步骤2: 遍历候选元素进行文本匹配
666
- match_start = time.time()
667
- matched = []
668
- for element in candidate_elements:
669
- text = element.get('text', '').lower()
670
- content_desc = element.get('content_desc', '').lower()
671
- content_desc_clean = content_desc.split('\n')[0].strip() if content_desc else ''
672
- content_desc_clean_lower = content_desc_clean.lower()
673
- bounds = element.get('bounds', '')
674
-
675
- # 图标匹配:优先匹配description,也匹配text
676
- text_matched = (query_lower == content_desc_clean_lower or # 完全匹配desc
677
- query_lower == text or # 完全匹配text
678
- query_lower in content_desc_clean_lower or # 包含匹配desc
679
- query_lower in text or # 包含匹配text
680
- query_keywords in content_desc_clean_lower or # 关键词匹配desc
681
- query_keywords in text) # 关键词匹配text
682
-
683
- # 🎯 特殊处理:如果图标没有文本描述,根据位置匹配
684
- if not text_matched and not text and not content_desc:
685
- # 无文本图标,根据位置描述匹配
686
- import re
687
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
688
- if match:
689
- x1, y1, x2, y2 = map(int, match.groups())
690
- center_x = (x1 + x2) // 2
691
- center_y = (y1 + y2) // 2
692
- screen_width = 1080
693
- screen_height = 2400
694
-
695
- # 右上角判断:X坐标在右侧70%以上,Y坐标在顶部30%以内
696
- is_top_right = center_x > screen_width * 0.7 and center_y < screen_height * 0.3
697
- # 顶部判断:Y坐标在顶部30%以内
698
- is_top = center_y < screen_height * 0.3
699
- # 右侧判断:X坐标在右侧70%以上
700
- is_right = center_x > screen_width * 0.7
701
-
702
- # 根据查询中的位置关键词匹配
703
- if ("右上角" in query or "上角" in query) and is_top_right:
704
- matched.append(element)
705
- print(f" ✅ 位置匹配(右上角): bounds={bounds}, center=({center_x}, {center_y})", file=sys.stderr)
706
- elif "顶部" in query and is_top:
707
- matched.append(element)
708
- print(f" ✅ 位置匹配(顶部): bounds={bounds}, center=({center_x}, {center_y})", file=sys.stderr)
709
- elif "右侧" in query or "右边" in query and is_right:
710
- matched.append(element)
711
- print(f" ✅ 位置匹配(右侧): bounds={bounds}, center=({center_x}, {center_y})", file=sys.stderr)
712
-
713
- if text_matched:
714
- matched.append(element)
715
-
716
- elif is_tab_query or ("点击" in query and "输入框" not in query):
717
- # 查询页签/按钮:只看可点击元素
718
- clickable_elements = [e for e in elements if e.get('clickable', False)]
719
- filter_time = (time.time() - filter_start) * 1000
720
- if len(clickable_elements) < len(elements):
721
- candidate_elements = clickable_elements
722
- print(f" 🎯 点击查询优化: 从{len(elements)}个元素缩减到{len(candidate_elements)}个可点击元素 (⏱️ {filter_time:.2f}ms)", file=sys.stderr)
723
- else:
724
- candidate_elements = elements
725
- print(f" ⏱️ 预过滤: {filter_time:.2f}ms (无缩减)", file=sys.stderr)
726
-
727
- # 步骤2: 遍历候选元素进行文本匹配
728
- match_start = time.time()
729
- matched = []
730
- for element in candidate_elements:
731
- text = element.get('text', '').lower()
732
- content_desc = element.get('content_desc', '').lower()
733
- content_desc_clean = content_desc.split('\n')[0].strip() if content_desc else ''
734
- content_desc_clean_lower = content_desc_clean.lower()
735
-
736
- # 匹配条件(简化判断提高速度)
737
- if (query_lower == content_desc_clean_lower or # 完全匹配desc
738
- query_lower == text or # 完全匹配text
739
- query_lower in content_desc_clean_lower or # 包含匹配desc
740
- query_lower in text or # 包含匹配text
741
- query_keywords in content_desc_clean_lower or # 关键词匹配desc
742
- query_keywords in text): # 关键词匹配text
743
- matched.append(element)
744
- else:
745
- # 其他查询:使用全部元素进行文本匹配
746
- candidate_elements = elements
747
- match_start = time.time() # 定义match_start
748
- matched = []
749
- for element in candidate_elements:
750
- text = element.get('text', '').lower()
751
- content_desc = element.get('content_desc', '').lower()
752
- class_name = element.get('class_name', '').lower()
753
-
754
- # 跳过无意义的容器元素
755
- if class_name in ['framelayout', 'linearlayout', 'relativelayout'] and not text and not content_desc:
756
- continue
757
-
758
- content_desc_clean = content_desc.split('\n')[0].strip() if content_desc else ''
759
- content_desc_clean_lower = content_desc_clean.lower()
760
-
761
- # 匹配条件
762
- if (query_lower == content_desc_clean_lower or
763
- query_lower == text or
764
- query_lower in content_desc_clean_lower or
765
- query_lower in text or
766
- query_keywords in content_desc_clean_lower or
767
- query_keywords in text):
768
- matched.append(element)
769
-
770
- if matched:
771
- match_time = (time.time() - match_start) * 1000
772
- print(f" ✅ 找到 {len(matched)} 个匹配元素 (⏱️ 文本匹配: {match_time:.2f}ms)", file=sys.stderr)
773
- print(f" {'─' * 60}", file=sys.stderr)
774
-
775
- # 显示所有匹配元素(不限制数量,让用户看到完整情况)
776
- for i, elem in enumerate(matched, 1):
777
- text = elem.get('text', '')
778
- desc = elem.get('content_desc', '')
779
- resource_id = elem.get('resource_id', '')
780
- class_name = elem.get('class_name', '')
781
- clickable = elem.get('clickable', False)
782
- focusable = elem.get('focusable', False)
783
- bounds = elem.get('bounds', '')
784
-
785
- # 计算匹配分数(用于显示)
786
- score = 0
787
- content_desc_clean_lower = desc.split('\n')[0].strip().lower() if desc else ''
788
- text_lower = text.lower()
789
-
790
- if query_lower == content_desc_clean_lower:
791
- score += 100
792
- elif query_lower in content_desc_clean_lower:
793
- score += 50
794
- elif query_keywords in content_desc_clean_lower:
795
- score += 48
796
-
797
- if query_lower == text_lower:
798
- score += 80
799
- elif query_lower in text_lower:
800
- score += 40
801
-
802
- if clickable:
803
- score += 20
804
- if focusable:
805
- score += 5
806
- if resource_id:
807
- score += 5
808
-
809
- # 格式化显示
810
- parts = []
811
- if text:
812
- parts.append(f"text='{text}'")
813
- if desc:
814
- desc_clean = desc.split('\n')[0]
815
- parts.append(f"desc='{desc_clean}'")
816
- if resource_id:
817
- parts.append(f"id='{resource_id}'")
818
- if class_name:
819
- parts.append(f"class={class_name}")
820
- if clickable:
821
- parts.append("[clickable]")
822
- if focusable:
823
- parts.append("[focusable]")
824
- if bounds:
825
- parts.append(f"bounds={bounds}")
826
-
827
- # 计算最终分数(在评分循环中会重新计算,这里只是显示)
828
- print(f" [{i:3d}] 分数={score:3d} | {' | '.join(parts) if parts else 'empty element'}", file=sys.stderr)
829
-
830
- print(f" {'─' * 60}", file=sys.stderr)
831
-
832
- # 🎯 Phase 1优化:位置索引定位(仅针对输入框查询)
833
- # 如果是输入框查询,且所有匹配的元素都是EditText且没有任何标识
834
- # 则使用位置索引(关键词)来区分
835
- if is_input_query and "输入框" in query and matched:
836
- # 检查是否所有匹配元素都是EditText且没有text/content_desc/resource_id
837
- all_empty_edittext = all(
838
- e.get('class_name', '').lower() in ['edittext', 'textfield'] and
839
- not e.get('text') and
840
- not e.get('content_desc') and
841
- not e.get('resource_id')
842
- for e in matched
843
- )
844
-
845
- if all_empty_edittext and len(matched) > 1:
846
- # 所有输入框都没有标识,使用位置索引
847
- print(f" 🎯 检测到{len(matched)}个无标识EditText,使用位置索引定位", file=sys.stderr)
848
-
849
- # 按Y坐标排序
850
- import re
851
- def get_y_coord(elem):
852
- bounds = elem.get('bounds', '')
853
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
854
- return int(match.group(2)) if match else 9999
855
-
856
- sorted_edittexts = sorted(matched, key=get_y_coord)
857
-
858
- # 根据关键词选择
859
- target_elem = None
860
- # 🎯 支持"第一个"、"第二个"、"第三个"等描述
861
- if any(kw in query for kw in ['第一个', '第1个', '1个', '首个']):
862
- target_elem = sorted_edittexts[0] if len(sorted_edittexts) > 0 else None
863
- if target_elem:
864
- print(f" → 关键词'第一个' → 第1个EditText (Y={get_y_coord(target_elem)})", file=sys.stderr)
865
- elif any(kw in query for kw in ['第二个', '第2个', '2个']):
866
- target_elem = sorted_edittexts[1] if len(sorted_edittexts) > 1 else None
867
- if target_elem:
868
- print(f" → 关键词'第二个' → 第2个EditText (Y={get_y_coord(target_elem)})", file=sys.stderr)
869
- elif any(kw in query for kw in ['第三个', '第3个', '3个']):
870
- target_elem = sorted_edittexts[2] if len(sorted_edittexts) > 2 else None
871
- if target_elem:
872
- print(f" → 关键词'第三个' → 第3个EditText (Y={get_y_coord(target_elem)})", file=sys.stderr)
873
- # 原有的关键词匹配
874
- elif any(kw in query for kw in ['邮箱', '账号', '用户名', '手机号', '电话']):
875
- target_elem = sorted_edittexts[0]
876
- print(f" → 关键词'邮箱/账号' → 第1个EditText (Y={get_y_coord(target_elem)})", file=sys.stderr)
877
- elif '验证码' in query:
878
- target_elem = sorted_edittexts[1] if len(sorted_edittexts) > 1 else sorted_edittexts[0]
879
- print(f" → 关键词'验证码' → 第2个EditText (Y={get_y_coord(target_elem)})", file=sys.stderr)
880
- elif '密码' in query:
881
- target_elem = sorted_edittexts[1] if len(sorted_edittexts) > 1 else sorted_edittexts[0]
882
- print(f" → 关键词'密码' → 第2个EditText (Y={get_y_coord(target_elem)})", file=sys.stderr)
883
-
884
- if target_elem:
885
- # 直接返回,使用bounds或class_name[index]作为ref
886
- ref = target_elem.get('bounds', '')
887
- if not ref:
888
- index = sorted_edittexts.index(target_elem)
889
- ref = f"EditText[{index}]"
890
-
891
- print(f" 🎯 位置索引定位成功:", file=sys.stderr)
892
- print(f" 元素: {query}", file=sys.stderr)
893
- print(f" ref: '{ref}'", file=sys.stderr)
894
- print(f" 置信度: 90%", file=sys.stderr)
895
-
896
- result = {
897
- 'element': query,
898
- 'ref': ref,
899
- 'confidence': 90,
900
- 'method': 'position_index'
901
- }
902
- return (result, []) # 成功找到,不需要AI兜底
903
-
904
- # 🔍 检测超大容器元素(H5页面的容器)
905
- # 如果是超大容器,使用bounds坐标点击(点击容器底部中心,提交按钮通常在那里)
906
- filtered_matched = []
907
- large_container = None
908
-
909
- for elem in matched:
910
- bounds = elem.get('bounds', '')
911
- if bounds:
912
- import re
913
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
914
- if match:
915
- x1, y1, x2, y2 = int(match.group(1)), int(match.group(2)), int(match.group(3)), int(match.group(4))
916
- width = x2 - x1
917
- height = y2 - y1
918
- # 如果宽度超过屏幕宽度的90%,很可能是H5容器元素
919
- if width > 1080 * 0.9: # 假设屏幕宽度1080
920
- print(f" ⚠️ 检测到超大H5容器: width={width}, height={height}", file=sys.stderr)
921
- print(f" bounds={bounds}", file=sys.stderr)
922
- # 保存这个容器,如果没有其他元素,就点击容器底部中心
923
- large_container = elem
924
- continue
925
- filtered_matched.append(elem)
926
-
927
- # 如果过滤后没有元素了,使用超大容器的bounds坐标点击
928
- if not filtered_matched and large_container:
929
- print(f" 🎯 使用H5容器bounds坐标定位(点击底部中心)", file=sys.stderr)
930
- bounds = large_container.get('bounds', '')
931
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
932
- if match:
933
- x1, y1, x2, y2 = int(match.group(1)), int(match.group(2)), int(match.group(3)), int(match.group(4))
934
- # 计算底部中心点(Y坐标在容器的95%位置,提交按钮通常在最底部)
935
- center_x = (x1 + x2) // 2
936
- bottom_y = int(y1 + (y2 - y1) * 0.95) # 95%位置(接近底部)
937
-
938
- # 构造一个新的bounds,指向底部中心区域
939
- # 创建一个小的点击区域(50x50像素)
940
- click_bounds = f"[{center_x-25},{bottom_y-25}][{center_x+25},{bottom_y+25}]"
941
-
942
- print(f" 点击位置: ({center_x}, {bottom_y})", file=sys.stderr)
943
- print(f" 点击bounds: {click_bounds}", file=sys.stderr)
944
-
945
- # 直接返回结果,使用bounds作为ref
946
- result = {
947
- 'element': query,
948
- 'ref': click_bounds, # 使用计算后的点击区域
949
- 'confidence': 85,
950
- 'method': 'h5_container_bounds'
951
- }
952
- return (result, [])
953
-
954
- # 如果过滤后没有元素也没有容器,使用原始列表
955
- if not filtered_matched:
956
- print(f" ⚠️ 过滤后无元素,使用原始列表", file=sys.stderr)
957
- filtered_matched = matched
958
- elif len(filtered_matched) < len(matched):
959
- print(f" ✅ 过滤后剩余 {len(filtered_matched)} 个元素(原{len(matched)}个)", file=sys.stderr)
960
-
961
- # 为每个匹配元素计算详细分数
962
- score_start = time.time()
963
- scored_elements = []
964
-
965
- for element in filtered_matched:
966
- score = 0
967
- score_details = [] # 记录加分详情(用于调试)
968
- content_desc = element.get('content_desc', '')
969
- content_desc_lower = content_desc.lower()
970
- # 清理content_desc(去除换行符和额外文本)
971
- content_desc_clean = content_desc.split('\n')[0].strip() if content_desc else ''
972
- content_desc_clean_lower = content_desc_clean.lower()
973
- text = element.get('text', '').lower()
974
- class_name = element.get('class_name', '').lower()
975
-
976
- # 元素类型判断
977
- is_textbox = class_name in ['edittext', 'textfield']
978
- is_button = element.get('clickable', False) and not is_textbox
979
- is_tab = element.get('clickable', False) and ('标签' in content_desc or '标签' in text)
980
-
981
- # ===== 类型匹配加分(最重要) =====
982
- # 如果查询包含"输入框",EditText类型应该获得大幅加分
983
- # 注意:只有当查询明确包含"输入框"时才加分,避免"输入邮箱"页签被误判
984
- if is_input_query and "输入框" in query and is_textbox:
985
- score += 200 # 输入框查询匹配到EditText,大幅加分
986
- score_details.append("类型匹配EditText+200")
987
-
988
- # 额外加分:优先匹配空的输入框(没有text或text是占位符的)
989
- # 检查text是否为空或只是占位符(如"请输入"、"•••"等)
990
- is_empty_or_placeholder = (
991
- not text or
992
- text.strip() == '' or
993
- text.strip() == '•••••••••••••••' or # 密码占位符
994
- '请输入' in text or
995
- '请填写' in text
996
- )
997
-
998
- if is_empty_or_placeholder:
999
- score += 100 # 空输入框大幅优先
1000
- score_details.append("空输入框+100")
1001
- else:
1002
- score -= 50 # 已有文本的输入框大幅降分(避免匹配到已填写的输入框)
1003
- score_details.append(f"已有文本({text})-50")
1004
-
1005
- # 如果查询包含"按钮",可点击的按钮应该获得加分
1006
- if "按钮" in query and is_button:
1007
- score += 150 # 按钮查询匹配到按钮元素
1008
- score_details.append("类型匹配Button+150")
1009
-
1010
- # 如果查询包含"页签"或"标签",页签元素应该获得加分
1011
- if is_tab_query and is_tab:
1012
- score += 150 # 页签查询匹配到页签元素
1013
- score_details.append("类型匹配Tab+150")
1014
-
1015
- # ===== 文本匹配评分 =====
1016
- # 优先匹配清理后的content_desc(完全匹配优先)
1017
- if query_lower == content_desc_clean_lower:
1018
- score += 150 # 完全匹配清理后的description(大幅加分)
1019
- score_details.append("完全匹配desc+150")
1020
- elif query_lower == content_desc_lower:
1021
- score += 140 # 完全匹配原始description(可能包含换行)
1022
- score_details.append("完全匹配原始desc+140")
1023
- elif query_lower in content_desc_clean_lower:
1024
- # 如果元素描述比查询长(如"游戏登录"包含"登录"),大幅降分
1025
- if len(content_desc_clean_lower) > len(query_lower):
1026
- score += 5 # 包含匹配但描述更长,大幅降分(避免匹配到"游戏登录")
1027
- score_details.append(f"包含匹配desc但更长({content_desc_clean_lower}包含{query_lower})+5")
1028
- else:
1029
- score += 30 # 包含匹配清理后的description(降分,避免部分匹配)
1030
- score_details.append("包含匹配desc+30")
1031
- elif query_lower in content_desc_lower:
1032
- # 如果元素描述比查询长,大幅降分
1033
- if len(content_desc_lower) > len(query_lower):
1034
- score += 3 # 包含匹配但描述更长,大幅降分
1035
- score_details.append(f"包含匹配原始desc但更长({content_desc_lower}包含{query_lower})+3")
1036
- else:
1037
- score += 25 # 包含匹配原始description(降分)
1038
- score_details.append("包含匹配原始desc+25")
1039
-
1040
- # 完全匹配text优先于部分匹配(重要)
1041
- if query_lower == text:
1042
- score += 80 # 完全匹配text
1043
- score_details.append("完全匹配text+80")
1044
- elif query_lower in text:
1045
- # 如果元素文本比查询长(如"游戏登录"包含"登录"),大幅降分
1046
- if len(text) > len(query_lower):
1047
- score += 5 # 包含匹配但文本更长,大幅降分(避免匹配到"游戏登录")
1048
- score_details.append(f"包含匹配text但更长({text}包含{query_lower})+5")
1049
- else:
1050
- score += 20 # 包含匹配text(降分,避免部分匹配)
1051
- score_details.append("包含匹配text+20")
1052
- elif text and query_lower in text: # 反向匹配(text包含查询)
1053
- score -= 30 # 如果text包含查询但不是完全匹配,大幅降分(避免匹配到已有文本)
1054
- score_details.append("反向匹配text-30")
1055
-
1056
- # 关键词匹配
1057
- if query_keywords == content_desc_clean_lower:
1058
- score += 95
1059
- score_details.append("关键词完全匹配+95")
1060
- elif query_keywords in content_desc_clean_lower:
1061
- score += 48
1062
- score_details.append("关键词包含匹配+48")
1063
-
1064
- # 文本匹配已在上面处理,这里不需要重复
1065
-
1066
- # ===== 元素属性加分 =====
1067
- # 优先选择可交互的元素
1068
- if element.get('clickable'):
1069
- score += 20 # 可点击元素
1070
- score_details.append("clickable+20")
1071
- if element.get('focusable'):
1072
- score += 10 # 可聚焦元素(输入框通常是focusable)
1073
- score_details.append("focusable+10")
1074
-
1075
- # 优先选择有resource-id的元素
1076
- if element.get('resource_id'):
1077
- score += 5
1078
- score_details.append("resource-id+5")
1079
-
1080
- # 页签特征:可点击+有文本/描述
1081
- if is_tab and (text or content_desc):
1082
- score += 15
1083
- score_details.append("Tab特征+15")
1084
-
1085
- # ===== 位置加分(输入框通常在页面上方,按顺序) =====
1086
- if is_input_query and is_textbox:
1087
- bounds = element.get('bounds', '')
1088
- if bounds:
1089
- # 解析bounds,Y坐标小的在上方
1090
- import re
1091
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
1092
- if match:
1093
- y1 = int(match.group(2))
1094
- # Y坐标越小(越靠上),分数越高(最多+50分)
1095
- # 假设屏幕高度2356,Y坐标在200-800之间是输入框常见位置
1096
- if 200 <= y1 <= 800:
1097
- # 对于"邮箱输入框",优先Y坐标更小的(第一个)
1098
- # 对于"密码输入框",优先Y坐标稍大的(第二个)
1099
- if "邮箱" in query:
1100
- # 邮箱输入框应该在第一个(Y坐标更小)
1101
- position_bonus = max(0, 50 - (y1 - 200) // 10)
1102
- score += position_bonus
1103
- score_details.append(f"位置Y={y1}(邮箱优先)+{position_bonus}")
1104
- elif "密码" in query:
1105
- # 密码输入框应该在第二个(Y坐标稍大)
1106
- # 如果Y坐标在400-700之间,给予加分
1107
- if 400 <= y1 <= 700:
1108
- position_bonus = max(0, 50 - abs(y1 - 550) // 10)
1109
- score += position_bonus
1110
- score_details.append(f"位置Y={y1}(密码优先)+{position_bonus}")
1111
- else:
1112
- score -= 20 # 位置不对,降分
1113
- score_details.append(f"位置Y={y1}(密码位置不对)-20")
1114
- else:
1115
- # 其他输入框,Y坐标越小越好
1116
- position_bonus = max(0, 30 - (y1 - 200) // 20)
1117
- score += position_bonus
1118
- score_details.append(f"位置Y={y1}+{position_bonus}")
1119
-
1120
- # 保存分数和详情
1121
- scored_elements.append((element, score, score_details))
1122
-
1123
- # 按分数排序,选择最佳匹配
1124
- scored_elements.sort(key=lambda x: x[1], reverse=True)
1125
- score_time = (time.time() - score_start) * 1000
1126
-
1127
- # 显示前5个的详细评分
1128
- print(f" 📊 评分详情(前5个) (⏱️ 评分: {score_time:.2f}ms):", file=sys.stderr)
1129
- for i, (elem, score, details) in enumerate(scored_elements[:5], 1):
1130
- text = elem.get('text', '')
1131
- desc = elem.get('content_desc', '')
1132
- class_name = elem.get('class_name', '')
1133
- desc_clean = desc.split('\n')[0] if desc else ''
1134
- print(f" [{i}] 分数={score:3d}: {desc_clean or text or class_name}", file=sys.stderr)
1135
- if details:
1136
- print(f" 详情: {' | '.join(details[:3])}", file=sys.stderr) # 只显示前3个加分项
1137
-
1138
- # 选择最佳匹配
1139
- best = scored_elements[0][0] if scored_elements else None
1140
- best_score = scored_elements[0][1] if scored_elements else 0
1141
-
1142
- # 已经在上面排序了,这里不需要再比较
1143
-
1144
- if best:
1145
- # 确定ref(优先resource-id,其次content_desc,最后text)
1146
- ref = best.get('resource_id')
1147
- if not ref:
1148
- # 如果description匹配,使用清理后的description定位(去除换行符)
1149
- content_desc = best.get('content_desc', '')
1150
- if content_desc:
1151
- # 清理content_desc(去除换行符和额外文本)
1152
- content_desc_clean = content_desc.split('\n')[0].strip()
1153
- content_desc_lower = content_desc.lower()
1154
- content_desc_clean_lower = content_desc_clean.lower()
1155
-
1156
- # 如果查询匹配清理后的description,使用清理后的值
1157
- if query_lower in content_desc_clean_lower or query_keywords in content_desc_clean_lower:
1158
- ref = content_desc_clean # 使用清理后的description
1159
- elif query_lower in content_desc_lower:
1160
- ref = content_desc_clean # 即使匹配原始,也使用清理后的
1161
- else:
1162
- ref = content_desc_clean # 默认使用清理后的
1163
- elif best.get('text'):
1164
- # 使用text定位(页签通常用text)
1165
- ref = best.get('text', '')
1166
- else:
1167
- ref = best.get('content_desc', '')
1168
-
1169
- # 确保ref不为空
1170
- if not ref:
1171
- # 如果还是没有ref,尝试使用bounds或class_name+索引
1172
- bounds = best.get('bounds', '')
1173
- class_name = best.get('class_name', '')
1174
-
1175
- if bounds:
1176
- # 使用bounds作为ref(格式:[x1,y1][x2,y2])
1177
- ref = bounds
1178
- print(f" ⚠️ 使用bounds作为ref: {bounds}", file=sys.stderr)
1179
- elif class_name:
1180
- # 使用class_name+索引(作为最后手段)
1181
- # 查找同类元素的索引
1182
- same_class_elements = [e for e in elements if e.get('class_name') == class_name]
1183
- index = same_class_elements.index(best) if best in same_class_elements else 0
1184
- ref = f"{class_name}[{index}]"
1185
- print(f" ⚠️ 使用class_name+索引作为ref: {ref}", file=sys.stderr)
1186
- else:
1187
- print(f" ⚠️ 找到匹配元素但无法确定ref: {best}", file=sys.stderr)
1188
- # 无法确定ref但有匹配元素,返回候选元素供AI分析
1189
- candidates = matched[:5] if matched else []
1190
- return (None, candidates)
1191
-
1192
- # 返回清理后的element描述
1193
- element_desc = best.get('content_desc', '') or best.get('text', '')
1194
- if element_desc and '\n' in element_desc:
1195
- element_desc = element_desc.split('\n')[0].strip()
1196
-
1197
- # 如果没有描述,使用查询文本或class_name
1198
- if not element_desc:
1199
- if query:
1200
- # 使用查询文本作为描述
1201
- element_desc = query
1202
- else:
1203
- element_desc = best.get('class_name', 'element')
1204
-
1205
- total_time = (time.time() - start_time) * 1000
1206
- print(f" 🎯 选择最佳匹配:", file=sys.stderr)
1207
- print(f" 元素: {element_desc}", file=sys.stderr)
1208
- print(f" ref: '{ref}'", file=sys.stderr)
1209
- print(f" 评分: {best_score}", file=sys.stderr)
1210
- print(f" 置信度: {min(95, 70 + best_score // 2)}%", file=sys.stderr)
1211
- print(f" ⏱️ XML深度分析总耗时: {total_time:.2f}ms", file=sys.stderr)
1212
-
1213
- result = {
1214
- 'element': element_desc,
1215
- 'ref': ref,
1216
- 'confidence': min(95, 70 + best_score // 2),
1217
- 'method': 'xml_analysis'
1218
- }
1219
- return (result, []) # 成功找到,不需要AI兜底
1220
-
1221
- # XML分析失败,但返回候选元素供AI分析
1222
- candidates = matched[:5] if matched else [] # 最多返回5个候选
1223
- return (None, candidates)
1224
-
1225
- async def _try_position_analysis(self, elements: list, query: str) -> Optional[Dict]:
1226
- """
1227
- 位置分析(Level 3.5)⭐ 新增
1228
-
1229
- 通过XML中的bounds信息定位无标识元素(如底部导航栏图标)
1230
-
1231
- 适用场景:
1232
- - "底部导航栏第X个图标"
1233
- - "顶部第X个图标"
1234
- - "右下角的按钮"
1235
-
1236
- Args:
1237
- elements: 已解析的元素列表
1238
- query: 查询文本
1239
-
1240
- Returns:
1241
- 定位结果 或 None
1242
- """
1243
- import time
1244
- start_time = time.time()
1245
-
1246
- # 检测是否是位置查询
1247
- position_keywords = [
1248
- '底部导航', '底部第', '底部图标',
1249
- '顶部导航', '顶部第', '顶部图标',
1250
- '右下角', '左下角', '右上角', '左上角',
1251
- '悬浮按钮', '悬浮', '加号', 'fab',
1252
- '第1个', '第2个', '第3个', '第4个', '第5个',
1253
- '第一个', '第二个', '第三个', '第四个', '第五个',
1254
- '最下面', '最上面', '最左边', '最右边',
1255
- '帖子', '按钮', '图标', # 支持通用的第N个描述
1256
- ]
1257
-
1258
- is_position_query = any(kw in query for kw in position_keywords)
1259
-
1260
- if not is_position_query:
1261
- return None
1262
-
1263
- print(f" 📍 Level 3.5: 位置分析...", file=sys.stderr)
1264
-
1265
- try:
1266
- from .position_analyzer import PositionAnalyzer
1267
-
1268
- # 获取屏幕尺寸(从第一个元素推测,或使用默认值)
1269
- screen_width = 1080
1270
- screen_height = 2400
1271
-
1272
- # 尝试从元素中获取屏幕尺寸
1273
- for elem in elements:
1274
- bounds = elem.get('bounds', '')
1275
- if bounds:
1276
- import re
1277
- match = re.search(r'\[(\d+),(\d+)\]\[(\d+),(\d+)\]', bounds)
1278
- if match:
1279
- x2, y2 = int(match.group(3)), int(match.group(4))
1280
- screen_width = max(screen_width, x2)
1281
- screen_height = max(screen_height, y2)
1282
-
1283
- analyzer = PositionAnalyzer(screen_width, screen_height)
1284
-
1285
- # 根据查询类型选择分析方法(优先级:位置 > 序号)
1286
- result = None
1287
- if '悬浮' in query or '加号' in query or 'fab' in query.lower():
1288
- result = analyzer.analyze_floating_button(elements, query)
1289
- elif '右上角' in query or '上角' in query:
1290
- # 🎯 新增:右上角位置分析
1291
- print(f" 🎯 检测到'右上角'查询,调用 analyze_corner_position", file=sys.stderr)
1292
- result = analyzer.analyze_corner_position(elements, query, corner='top_right')
1293
- elif '左上角' in query:
1294
- result = analyzer.analyze_corner_position(elements, query, corner='top_left')
1295
- elif '右下角' in query:
1296
- result = analyzer.analyze_corner_position(elements, query, corner='bottom_right')
1297
- elif '左下角' in query:
1298
- result = analyzer.analyze_corner_position(elements, query, corner='bottom_left')
1299
- elif ('底部' in query and ('导航' in query or '图标' in query)) or ('底部' in query and any(kw in query for kw in ['第一个', '第二个', '第三个', '第四个', '第五个', '第1个', '第2个', '第3个', '第4个', '第5个'])):
1300
- # 🎯 修复:优先匹配"底部第X个图标"这种描述
1301
- print(f" 🎯 检测到'底部第X个'查询,调用 analyze_bottom_navigation", file=sys.stderr)
1302
- result = analyzer.analyze_bottom_navigation(elements, query)
1303
- elif ('顶部' in query and ('导航' in query or '图标' in query)) or ('顶部' in query and any(kw in query for kw in ['第一个', '第二个', '第三个', '第四个', '第五个', '第1个', '第2个', '第3个', '第4个', '第5个'])):
1304
- # 🎯 修复:优先匹配"顶部第X个图标"这种描述
1305
- print(f" 🎯 检测到'顶部第X个'查询,调用 analyze_top_navigation", file=sys.stderr)
1306
- result = analyzer.analyze_top_navigation(elements, query)
1307
- elif any(kw in query for kw in ['第一个', '第二个', '第三个', '第四个', '第五个', '第1个', '第2个', '第3个', '第4个', '第5个']):
1308
- # 通用的"第N个"定位(没有位置限定)
1309
- print(f" 🎯 检测到'第N个'查询,调用 analyze_nth_element", file=sys.stderr)
1310
- result = analyzer.analyze_nth_element(elements, query)
1311
- else:
1312
- # 其他位置查询(暂不支持)
1313
- print(f" ⚠️ 未匹配到任何位置分析方法", file=sys.stderr)
1314
- result = None
1315
-
1316
- if result:
1317
- elapsed = (time.time() - start_time) * 1000
1318
- print(f" ⏱️ 位置分析耗时: {elapsed:.2f}ms", file=sys.stderr)
1319
- return result
1320
-
1321
- except ImportError:
1322
- print(f" ⚠️ 位置分析器未安装", file=sys.stderr)
1323
- except Exception as e:
1324
- print(f" ⚠️ 位置分析失败: {e}", file=sys.stderr)
1325
-
1326
- return None
1327
-
1328
- async def _try_ai_candidates(self, query: str, candidates: list, all_elements: list) -> Optional[Dict]:
1329
- """
1330
- AI智能兜底 - 分析候选元素
1331
-
1332
- Args:
1333
- query: 用户查询
1334
- candidates: 候选元素列表
1335
- all_elements: 所有元素(用于构建上下文)
1336
- """
1337
- if not candidates:
1338
- return None
1339
-
1340
- try:
1341
- from ..ai.ai_analyzer import ai_analyzer
1342
-
1343
- # 构建上下文信息
1344
- context = f"页面共有{len(all_elements)}个元素,已筛选出{len(candidates)}个候选"
1345
-
1346
- # 调用AI分析
1347
- result = await ai_analyzer.analyze_candidates(query, candidates, context)
1348
- return result
1349
-
1350
- except ImportError:
1351
- print(f" ⚠️ AI分析器未配置", file=sys.stderr)
1352
- return None
1353
- except Exception as e:
1354
- print(f" ⚠️ AI智能兜底失败: {e}", file=sys.stderr)
1355
- return None
1356
-
1357
- async def _try_vision(self, query: str) -> Optional[Dict]:
1358
- """尝试视觉识别(多模态)"""
1359
- print(f" 👁️ Level 4: 尝试视觉识别...", file=sys.stderr)
1360
- try:
1361
- from ...vision.vision_locator import MobileVisionLocator
1362
-
1363
- vision_locator = MobileVisionLocator(self.mobile_client)
1364
- result = await vision_locator.locate_element_by_vision(query)
1365
-
1366
- if result and result.get('found'):
1367
- # 视觉识别返回的是坐标点,直接用于点击
1368
- x = result.get('x', 0)
1369
- y = result.get('y', 0)
1370
- confidence = result.get('confidence', 80)
1371
- print(f" ✅ 视觉识别成功: 坐标({x}, {y}), 置信度{confidence}%", file=sys.stderr)
1372
- return {
1373
- 'element': query,
1374
- 'ref': f"vision_coord_{x}_{y}", # 特殊标记,表示是坐标定位
1375
- 'confidence': confidence,
1376
- 'method': 'vision',
1377
- 'x': x,
1378
- 'y': y,
1379
- }
1380
- else:
1381
- reason = result.get('reason', 'unknown') if result else 'result is None'
1382
- print(f" ❌ 视觉识别未找到元素: {reason}", file=sys.stderr)
1383
- except ImportError:
1384
- print(" ⚠️ 视觉识别模块未安装(需要安装dashscope: pip install dashscope)", file=sys.stderr)
1385
- except Exception as e:
1386
- print(f" ❌ 视觉识别异常: {e}", file=sys.stderr)
1387
- import traceback
1388
- traceback.print_exc()
1389
-
1390
- return None
1391
-
1392
- async def _try_ai_analysis(self, query: str, elements: list = None) -> Optional[Dict]:
1393
- """尝试文本AI分析(最后手段)- 使用AI分析移动端XML结构"""
1394
- print(f" 🤖 Level 4: 尝试AI分析...", file=sys.stderr)
1395
-
1396
- try:
1397
- # 加载根目录的.env配置
1398
- from pathlib import Path
1399
- import os
1400
- from dotenv import load_dotenv
1401
-
1402
- # 查找根目录的.env文件(从mobile_mcp向上查找)
1403
- current_dir = Path(__file__).parent
1404
- root_dir = current_dir.parent.parent.parent # backend/mobile_mcp -> backend -> douzi-ai
1405
- env_file = root_dir / '.env'
1406
-
1407
- if env_file.exists():
1408
- load_dotenv(env_file)
1409
- print(f" ✅ 已加载.env配置: {env_file}", file=sys.stderr)
1410
- else:
1411
- # 尝试从当前目录向上查找
1412
- for parent in current_dir.parents:
1413
- env_file = parent / '.env'
1414
- if env_file.exists():
1415
- load_dotenv(env_file)
1416
- print(f" ✅ 已加载.env配置: {env_file}", file=sys.stderr)
1417
- break
1418
-
1419
- # 获取页面快照(格式化的XML结构)
1420
- snapshot = await self.mobile_client.snapshot()
1421
-
1422
- # 获取AI配置
1423
- try:
1424
- mind_ui_path = PathLib(__file__).parent.parent.parent.parent / 'mind-ui'
1425
- if str(mind_ui_path) not in sys.path:
1426
- sys.path.insert(0, str(mind_ui_path))
1427
-
1428
- from browser_mcp.core.ai.api.api_client import optimize_with_ai_auto
1429
- from browser_mcp.core.ai.config.config import get_ai_config
1430
-
1431
- # 检查AI配置是否可用
1432
- ai_config = get_ai_config()
1433
- if ai_config.default_provider == "manual" or ai_config.is_manual_mode():
1434
- print(f" ⚠️ AI配置为手动模式,跳过AI分析", file=sys.stderr)
1435
- return None
1436
-
1437
- print(f" 🤖 使用AI分析 (Provider: {ai_config.default_provider}, Model: {ai_config.default_model})", file=sys.stderr)
1438
-
1439
- # 创建适配器,让AI可以分析移动端页面
1440
- class MobileAdapter:
1441
- async def snapshot(self):
1442
- class SnapshotResult:
1443
- def __init__(self, text):
1444
- self.content = [type('Content', (), {'text': text})()]
1445
- return SnapshotResult(snapshot)
1446
-
1447
- adapter = MobileAdapter()
1448
-
1449
- # 调用AI分析
1450
- result = await optimize_with_ai_auto(adapter, query)
1451
-
1452
- if result:
1453
- print(f" ✅ AI分析成功: {result.get('element', '')} (置信度: {result.get('confidence', 0)}%)", file=sys.stderr)
1454
- # 转换结果为移动端格式,传入elements避免重复读取XML
1455
- converted = self._convert_result(result, query, elements)
1456
- if converted and converted.get('ref'):
1457
- return converted
1458
- else:
1459
- print(f" ⚠️ AI分析结果转换失败(无法在移动端XML中找到对应元素)", file=sys.stderr)
1460
- return None
1461
- else:
1462
- print(f" ❌ AI分析未找到元素", file=sys.stderr)
1463
- return None
1464
-
1465
- except ImportError as e:
1466
- print(f" ⚠️ 无法导入AI模块: {e}", file=sys.stderr)
1467
- return None
1468
- except Exception as e:
1469
- print(f" ⚠️ AI分析失败: {e}", file=sys.stderr)
1470
- import traceback
1471
- traceback.print_exc()
1472
- return None
1473
-
1474
- except ImportError:
1475
- print(f" ⚠️ 未安装python-dotenv,无法加载.env配置", file=sys.stderr)
1476
- return None
1477
- except Exception as e:
1478
- print(f" ⚠️ AI分析异常: {e}", file=sys.stderr)
1479
- return None
1480
-
1481
- def _convert_result(self, result: Dict, query: str = "", elements: list = None) -> Dict:
1482
- """
1483
- 转换结果为移动端格式
1484
-
1485
- SmartLocator返回的ref可能是:
1486
- 1. CSS选择器(如 "button.login-btn")- 需要重新定位
1487
- 2. resource-id(如 "com.app:id/login")- 直接使用
1488
- 3. text(如 "登录")- 直接使用
1489
- 4. bounds(如 "[100,200][300,400]")- 直接使用
1490
-
1491
- Args:
1492
- result: SmartLocator返回的结果
1493
- query: 查询文本
1494
- elements: 已解析的元素列表(可选,避免重复读取XML)
1495
- """
1496
- ref = result.get('ref', '')
1497
- element = result.get('element', '')
1498
-
1499
- print(f" 🔄 转换AI结果: ref='{ref}', element='{element}', query='{query}'", file=sys.stderr)
1500
-
1501
- # 🎯 优化:检测 XPath 格式(AI 常返回这种格式)
1502
- is_xpath = ref.startswith('//') or ref.startswith('//*[@')
1503
- if is_xpath:
1504
- print(f" ⚠️ 检测到XPath格式,需要重新定位: {ref}", file=sys.stderr)
1505
-
1506
- # 如果ref是CSS选择器、HTML标签或XPath格式,需要重新定位
1507
- # 这种情况下,使用query或element文本重新在XML中查找
1508
- html_tags = ['input', 'button', 'textbox', 'submit', 'textarea', 'select', 'a', 'div', 'span']
1509
- if is_xpath or '.' in ref or '#' in ref or ref.startswith('button') or ref.startswith('textbox') or ref.lower() in html_tags:
1510
- print(f" 🔍 检测到HTML标签/CSS选择器,重新定位...", file=sys.stderr)
1511
- # CSS选择器格式,需要重新定位
1512
- # 使用query或element文本在XML中查找
1513
-
1514
- # ⚡ 优化:如果传入了elements,直接使用;否则才读取XML
1515
- if elements is None:
1516
- xml_string = self.mobile_client.u2.dump_hierarchy()
1517
- elements = self.mobile_client.xml_parser.parse(xml_string)
1518
-
1519
- # 优先使用query,其次使用element
1520
- search_text = (query or element).lower()
1521
-
1522
- # 🔍 只在可点击元素中查找
1523
- clickable_elements = [e for e in elements if e.get('clickable') or e.get('class_name') in ['Button', 'ImageButton', 'EditText']]
1524
- print(f" 🔍 在{len(clickable_elements)}个可点击元素中查找 '{search_text}'", file=sys.stderr)
1525
-
1526
- for elem in clickable_elements:
1527
- elem_text = elem.get('text', '').lower()
1528
- elem_desc = elem.get('content_desc', '').lower()
1529
- elem_resource_id = elem.get('resource_id', '').lower()
1530
-
1531
- # 精确匹配(text或description完全包含查询文本)
1532
- # 🎯 改进:支持模糊匹配(忽略空格、括号等)
1533
- search_text_normalized = search_text.replace(' ', '').replace('(', '').replace(')', '').replace('(', '').replace(')', '')
1534
- elem_text_normalized = elem_text.replace(' ', '').replace('(', '').replace(')', '').replace('(', '').replace(')', '')
1535
- elem_desc_normalized = elem_desc.replace(' ', '').replace('(', '').replace(')', '').replace('(', '').replace(')', '')
1536
-
1537
- if search_text and (
1538
- (elem_text and search_text in elem_text) or
1539
- (elem_desc and search_text in elem_desc) or
1540
- (elem_text_normalized and search_text_normalized in elem_text_normalized) or
1541
- (elem_desc_normalized and search_text_normalized in elem_desc_normalized)
1542
- ):
1543
- # 找到匹配,优先使用text/description(更可靠),其次使用resource-id
1544
- new_ref = elem.get('text') or elem.get('content_desc') or elem.get('resource_id', '')
1545
- if new_ref:
1546
- print(f" ✅ 找到匹配元素: {new_ref}", file=sys.stderr)
1547
- result['ref'] = new_ref
1548
- result['method'] = 'rule_match_converted'
1549
- return result
1550
-
1551
- # 如果找不到,尝试使用element文本(去除"按钮"等后缀)
1552
- if element:
1553
- element_clean = element.replace('按钮', '').replace('输入框', '').strip().lower()
1554
- print(f" 🔍 尝试使用清洗后的element: '{element_clean}'", file=sys.stderr)
1555
- for elem in elements:
1556
- elem_text = elem.get('text', '').lower()
1557
- elem_desc = elem.get('content_desc', '').lower()
1558
- if element_clean in elem_text or elem_text in element_clean or element_clean in elem_desc or elem_desc in element_clean:
1559
- new_ref = elem.get('resource_id') or elem.get('text') or elem.get('content_desc', '')
1560
- if new_ref:
1561
- print(f" ✅ 找到匹配元素: {new_ref}", file=sys.stderr)
1562
- result['ref'] = new_ref
1563
- result['method'] = 'rule_match_converted'
1564
- return result
1565
-
1566
- print(f" ❌ 转换失败,未找到匹配元素", file=sys.stderr)
1567
- return None # 转换失败返回None,而不是返回原result
1568
-
1569
- # 其他格式(resource-id、text、bounds)直接返回
1570
- return result
1571
-
1572
- def _get_cache_key(self, query: str) -> str:
1573
- """生成缓存key"""
1574
- # 使用页面结构hash + 查询文本
1575
- snapshot_hash = hashlib.md5(
1576
- str(self.mobile_client._snapshot_cache or '').encode()
1577
- ).hexdigest()[:8]
1578
-
1579
- query_hash = hashlib.md5(query.encode()).hexdigest()[:8]
1580
-
1581
- return f"{snapshot_hash}_{query_hash}"
1582
-
1583
- async def _cache_result(self, query: str, result: Dict):
1584
- """缓存定位结果"""
1585
- cache_key = self._get_cache_key(query)
1586
- self._cache[cache_key] = {
1587
- 'result': result,
1588
- 'timestamp': time.time()
1589
- }
1590
-
1591
- def _log_performance(self, query: str, method: str, total_time: float, xml_count: int, xml_time: float = 0):
1592
- """
1593
- 记录性能日志
1594
-
1595
- Args:
1596
- query: 查询文本
1597
- method: 匹配方法
1598
- total_time: 总耗时(毫秒)
1599
- xml_count: XML读取次数
1600
- xml_time: XML读取耗时(毫秒)
1601
- """
1602
- self.performance_logs.append({
1603
- 'query': query,
1604
- 'method': method,
1605
- 'total_time': total_time,
1606
- 'xml_count': xml_count,
1607
- 'xml_time': xml_time,
1608
- })
1609
-
1610
- def print_performance_report(self):
1611
- """打印性能报告"""
1612
- print("\n" + "=" * 80, file=sys.stderr)
1613
- print("📊 性能监控报告", file=sys.stderr)
1614
- print("=" * 80, file=sys.stderr)
1615
-
1616
- print(f"\n📈 总体统计:", file=sys.stderr)
1617
- print(f" 总定位次数: {self.stats['total']}", file=sys.stderr)
1618
- print(f" 总耗时: {self.stats['total_time']:.2f}ms", file=sys.stderr)
1619
- print(f" 平均耗时: {self.stats['total_time'] / max(1, self.stats['total']):.2f}ms", file=sys.stderr)
1620
- print(f" XML总读取次数: {self.stats['xml_read_count']}", file=sys.stderr)
1621
-
1622
- print(f"\n🎯 匹配方式分布:", file=sys.stderr)
1623
- print(f" 缓存命中: {self.stats['cache_hits']} ({self.stats['cache_hits']/max(1, self.stats['total'])*100:.1f}%)", file=sys.stderr)
1624
- print(f" 快速预匹配: {self.stats['quick_match_hits']} ({self.stats['quick_match_hits']/max(1, self.stats['total'])*100:.1f}%)", file=sys.stderr)
1625
- print(f" 规则匹配: {self.stats['rule_hits']} ({self.stats['rule_hits']/max(1, self.stats['total'])*100:.1f}%)", file=sys.stderr)
1626
- print(f" XML深度分析: {self.stats['xml_analysis']} ({self.stats['xml_analysis']/max(1, self.stats['total'])*100:.1f}%)", file=sys.stderr)
1627
- print(f" 位置分析: {self.stats.get('position_analysis', 0)} ({self.stats.get('position_analysis', 0)/max(1, self.stats['total'])*100:.1f}%) ⭐", file=sys.stderr)
1628
- print(f" 视觉识别: {self.stats['vision_calls']} ({self.stats['vision_calls']/max(1, self.stats['total'])*100:.1f}%)", file=sys.stderr)
1629
- print(f" AI分析: {self.stats['ai_calls']} ({self.stats['ai_calls']/max(1, self.stats['total'])*100:.1f}%)", file=sys.stderr)
1630
-
1631
- if self.performance_logs:
1632
- print(f"\n📋 详细性能日志:", file=sys.stderr)
1633
- print(f"{'序号':<6}{'查询':<25}{'方法':<15}{'总耗时(ms)':<12}{'XML次数':<10}{'XML耗时(ms)':<12}", file=sys.stderr)
1634
- print("-" * 80, file=sys.stderr)
1635
- for i, log in enumerate(self.performance_logs, 1):
1636
- query_short = log['query'][:22] + '...' if len(log['query']) > 22 else log['query']
1637
- print(f"{i:<6}{query_short:<25}{log['method']:<15}{log['total_time']:<12.2f}{log['xml_count']:<10}{log['xml_time']:<12.2f}", file=sys.stderr)
1638
-
1639
- print("\n" + "=" * 80, file=sys.stderr)
1640
-