mobile-mcp-ai 2.1.2__py3-none-any.whl → 2.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. mobile_mcp/__init__.py +34 -0
  2. mobile_mcp/config.py +142 -0
  3. mobile_mcp/core/basic_tools_lite.py +3266 -0
  4. {core → mobile_mcp/core}/device_manager.py +2 -2
  5. mobile_mcp/core/dynamic_config.py +272 -0
  6. mobile_mcp/core/ios_client_wda.py +569 -0
  7. mobile_mcp/core/ios_device_manager_wda.py +306 -0
  8. {core → mobile_mcp/core}/mobile_client.py +279 -39
  9. mobile_mcp/core/template_matcher.py +429 -0
  10. mobile_mcp/core/templates/close_buttons/auto_x_0112_151217.png +0 -0
  11. mobile_mcp/core/templates/close_buttons/auto_x_0112_152037.png +0 -0
  12. mobile_mcp/core/templates/close_buttons/auto_x_0112_152840.png +0 -0
  13. mobile_mcp/core/templates/close_buttons/auto_x_0112_153256.png +0 -0
  14. mobile_mcp/core/templates/close_buttons/auto_x_0112_154847.png +0 -0
  15. mobile_mcp/core/templates/close_buttons/gray_x_stock_ad.png +0 -0
  16. {core → mobile_mcp/core}/utils/smart_wait.py +3 -3
  17. mobile_mcp/mcp_tools/__init__.py +10 -0
  18. mobile_mcp/mcp_tools/mcp_server.py +1071 -0
  19. mobile_mcp_ai-2.5.8.dist-info/METADATA +469 -0
  20. mobile_mcp_ai-2.5.8.dist-info/RECORD +32 -0
  21. mobile_mcp_ai-2.5.8.dist-info/entry_points.txt +2 -0
  22. mobile_mcp_ai-2.5.8.dist-info/licenses/LICENSE +201 -0
  23. mobile_mcp_ai-2.5.8.dist-info/top_level.txt +1 -0
  24. core/ai/__init__.py +0 -11
  25. core/ai/ai_analyzer.py +0 -197
  26. core/ai/ai_config.py +0 -116
  27. core/ai/ai_platform_adapter.py +0 -399
  28. core/ai/smart_test_executor.py +0 -520
  29. core/ai/test_generator.py +0 -365
  30. core/ai/test_generator_from_history.py +0 -391
  31. core/ai/test_generator_standalone.py +0 -293
  32. core/assertion/__init__.py +0 -9
  33. core/assertion/smart_assertion.py +0 -341
  34. core/basic_tools.py +0 -377
  35. core/h5/__init__.py +0 -10
  36. core/h5/h5_handler.py +0 -548
  37. core/ios_client.py +0 -219
  38. core/ios_device_manager.py +0 -252
  39. core/locator/__init__.py +0 -10
  40. core/locator/cursor_ai_auto_analyzer.py +0 -119
  41. core/locator/cursor_vision_helper.py +0 -414
  42. core/locator/mobile_smart_locator.py +0 -1640
  43. core/locator/position_analyzer.py +0 -813
  44. core/locator/script_updater.py +0 -157
  45. core/nl_test_runner.py +0 -585
  46. core/smart_app_launcher.py +0 -334
  47. core/smart_tools.py +0 -311
  48. mcp/__init__.py +0 -8
  49. mcp/mcp_server.py +0 -1919
  50. mcp/mcp_server_simple.py +0 -476
  51. mobile_mcp_ai-2.1.2.dist-info/METADATA +0 -567
  52. mobile_mcp_ai-2.1.2.dist-info/RECORD +0 -45
  53. mobile_mcp_ai-2.1.2.dist-info/entry_points.txt +0 -2
  54. mobile_mcp_ai-2.1.2.dist-info/top_level.txt +0 -4
  55. vision/__init__.py +0 -10
  56. vision/vision_locator.py +0 -404
  57. {core → mobile_mcp/core}/__init__.py +0 -0
  58. {core → mobile_mcp/core}/utils/__init__.py +0 -0
  59. {core → mobile_mcp/core}/utils/logger.py +0 -0
  60. {core → mobile_mcp/core}/utils/operation_history_manager.py +0 -0
  61. {utils → mobile_mcp/utils}/__init__.py +0 -0
  62. {utils → mobile_mcp/utils}/logger.py +0 -0
  63. {utils → mobile_mcp/utils}/xml_formatter.py +0 -0
  64. {utils → mobile_mcp/utils}/xml_parser.py +0 -0
  65. {mobile_mcp_ai-2.1.2.dist-info → mobile_mcp_ai-2.5.8.dist-info}/WHEEL +0 -0
vision/vision_locator.py DELETED
@@ -1,404 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- 移动端视觉定位器 - 多模态AI支持
5
-
6
- 功能:
7
- 1. 截图
8
- 2. 图片压缩
9
- 3. 多模态AI分析(通义千问VL / GPT-4V)
10
- 4. 返回元素坐标
11
- """
12
- import base64
13
- import hashlib
14
- import asyncio
15
- from typing import Dict, Optional
16
- import tempfile
17
-
18
- try:
19
- import dashscope
20
- from dashscope import MultiModalConversation
21
- DASHSCOPE_AVAILABLE = True
22
- except ImportError:
23
- DASHSCOPE_AVAILABLE = False
24
-
25
- try:
26
- from PIL import Image
27
- PIL_AVAILABLE = True
28
- except ImportError:
29
- PIL_AVAILABLE = False
30
-
31
-
32
- class MobileVisionLocator:
33
- """
34
- 移动端视觉定位器
35
-
36
- 使用多模态AI模型进行视觉元素定位
37
- """
38
-
39
- def __init__(self, mobile_client, api_key: Optional[str] = None):
40
- """
41
- 初始化视觉定位器
42
-
43
- Args:
44
- mobile_client: MobileClient实例
45
- api_key: 通义千问API Key(可选,从环境变量读取)
46
- """
47
- self.mobile_client = mobile_client
48
-
49
- # API配置
50
- self.api_key = api_key or self._get_api_key()
51
- if self.api_key and DASHSCOPE_AVAILABLE:
52
- dashscope.api_key = self.api_key
53
-
54
- # 缓存
55
- self._cache: Dict[str, Dict] = {}
56
-
57
- # 统计
58
- self.stats = {
59
- 'total_calls': 0,
60
- 'cache_hits': 0,
61
- 'api_calls': 0,
62
- }
63
-
64
- def _get_api_key(self) -> Optional[str]:
65
- """从环境变量获取API Key"""
66
- import os
67
- from pathlib import Path
68
- from dotenv import load_dotenv
69
-
70
- # 尝试加载.env文件(从mobile_mcp向上查找)
71
- current_dir = Path(__file__).parent
72
- root_dir = current_dir.parent.parent.parent # vision -> mobile_mcp -> backend -> douzi-ai
73
- env_file = root_dir / '.env'
74
-
75
- if env_file.exists():
76
- load_dotenv(env_file)
77
- print(f" ✅ 已加载.env文件: {env_file}")
78
-
79
- # 🎯 支持多种API Key名称(兼容性)
80
- api_key = (
81
- os.environ.get('DASHSCOPE_API_KEY') or
82
- os.environ.get('QWEN_API_KEY') or # 通义千问API Key
83
- os.environ.get('ALIBABA_CLOUD_API_KEY') or
84
- os.environ.get('DASHSCOPE_KEY')
85
- )
86
-
87
- if api_key:
88
- print(f" ✅ 已读取API Key(长度: {len(api_key)})")
89
- else:
90
- print(f" ⚠️ 未找到API Key,检查的环境变量: DASHSCOPE_API_KEY, QWEN_API_KEY, ALIBABA_CLOUD_API_KEY")
91
-
92
- return api_key
93
-
94
- def _get_vision_model(self) -> str:
95
- """获取视觉识别模型(支持环境变量配置)"""
96
- import os
97
- # 支持环境变量配置,默认使用 qwen-vl-plus
98
- return os.environ.get('VISION_MODEL', 'qwen-vl-plus')
99
-
100
- async def locate_element_by_vision(self, element_description: str, region: Optional[Dict] = None) -> Dict:
101
- """
102
- 通过视觉识别定位元素
103
-
104
- Args:
105
- element_description: 元素描述(自然语言)
106
- region: 截图区域 {"x": 0, "y": 0, "width": 1080, "height": 2400},None则智能选择区域
107
-
108
- Returns:
109
- 定位结果(包含绝对坐标)
110
- """
111
- self.stats['total_calls'] += 1
112
-
113
- # 检查缓存
114
- cache_key = self._get_cache_key(element_description)
115
- if cache_key in self._cache:
116
- self.stats['cache_hits'] += 1
117
- return self._cache[cache_key]
118
-
119
- # 智能选择区域(如果未指定)
120
- if region is None:
121
- region = self._smart_region_selection(element_description)
122
-
123
- # 截图(支持区域截图)
124
- screenshot_path, region_offset = await self._take_screenshot(region)
125
-
126
- # 压缩图片
127
- if PIL_AVAILABLE:
128
- screenshot_path = self._compress_image(screenshot_path)
129
-
130
- # 调用多模态AI(返回相对于截图的坐标)
131
- result = await self._call_vision_api(screenshot_path, element_description)
132
-
133
- # 坐标转换:截图相对坐标 → 屏幕绝对坐标
134
- if result.get('found') and region_offset:
135
- result['x'] = result.get('x', 0) + region_offset['x']
136
- result['y'] = result.get('y', 0) + region_offset['y']
137
- result['region_offset'] = region_offset # 记录偏移量(调试用)
138
-
139
- # 缓存结果
140
- self._cache[cache_key] = result
141
-
142
- return result
143
-
144
- def _smart_region_selection(self, description: str) -> Optional[Dict]:
145
- """
146
- 智能选择截图区域(减少图片大小,提高识别精度)
147
-
148
- 根据元素描述推断应该截哪个区域:
149
- - "底部导航栏" → 只截底部区域
150
- - "顶部标题栏" → 只截顶部区域
151
- - "登录按钮" → 截中间区域
152
- """
153
- # 获取屏幕尺寸
154
- screen_info = self.mobile_client.u2.info
155
- screen_width = screen_info.get('displayWidth', 1080)
156
- screen_height = screen_info.get('displayHeight', 2400)
157
-
158
- description_lower = description.lower()
159
-
160
- # 底部区域(底部导航栏、底部按钮等)
161
- if any(keyword in description_lower for keyword in ['底部', 'bottom', '导航栏', 'tab', '底部导航']):
162
- return {
163
- 'x': 0,
164
- 'y': int(screen_height * 0.8), # 底部20%
165
- 'width': screen_width,
166
- 'height': int(screen_height * 0.2)
167
- }
168
-
169
- # 顶部区域(标题栏、顶部导航等)
170
- if any(keyword in description_lower for keyword in ['顶部', 'top', '标题', 'header', '导航栏']):
171
- return {
172
- 'x': 0,
173
- 'y': 0,
174
- 'width': screen_width,
175
- 'height': int(screen_height * 0.2) # 顶部20%
176
- }
177
-
178
- # 中间区域(登录按钮、表单等)
179
- if any(keyword in description_lower for keyword in ['登录', 'login', '按钮', 'button', '表单', 'form']):
180
- return {
181
- 'x': 0,
182
- 'y': int(screen_height * 0.3),
183
- 'width': screen_width,
184
- 'height': int(screen_height * 0.4) # 中间40%
185
- }
186
-
187
- # 默认全屏
188
- return None
189
-
190
- async def _take_screenshot(self, region: Optional[Dict] = None) -> tuple:
191
- """
192
- 截图(支持区域截图)
193
-
194
- Args:
195
- region: 截图区域 {"x": 0, "y": 0, "width": 1080, "height": 2400},None则全屏
196
-
197
- Returns:
198
- (截图路径, 区域偏移量) - 偏移量用于坐标转换
199
- """
200
- # 创建临时文件
201
- temp_file = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
202
- temp_path = temp_file.name
203
- temp_file.close()
204
-
205
- # 获取屏幕尺寸
206
- screen_info = self.mobile_client.u2.info
207
- screen_width = screen_info.get('displayWidth', 1080)
208
- screen_height = screen_info.get('displayHeight', 2400)
209
-
210
- # 区域偏移量(用于坐标转换)
211
- region_offset = {'x': 0, 'y': 0}
212
-
213
- if region:
214
- # 区域截图:先截全屏,再裁剪
215
- full_screenshot_path = temp_path.replace('.png', '_full.png')
216
- self.mobile_client.u2.screenshot(full_screenshot_path)
217
-
218
- # 裁剪区域
219
- if PIL_AVAILABLE:
220
- img = Image.open(full_screenshot_path)
221
- x = region.get('x', 0)
222
- y = region.get('y', 0)
223
- width = region.get('width', screen_width)
224
- height = region.get('height', screen_height)
225
-
226
- # 确保不越界
227
- x = max(0, min(x, screen_width))
228
- y = max(0, min(y, screen_height))
229
- width = min(width, screen_width - x)
230
- height = min(height, screen_height - y)
231
-
232
- # 裁剪
233
- cropped = img.crop((x, y, x + width, y + height))
234
- cropped.save(temp_path)
235
-
236
- # 记录偏移量
237
- region_offset = {'x': x, 'y': y}
238
- else:
239
- # PIL不可用时,使用全屏截图
240
- import shutil
241
- shutil.copy2(full_screenshot_path, temp_path)
242
- else:
243
- # 全屏截图
244
- self.mobile_client.u2.screenshot(temp_path)
245
-
246
- return temp_path, region_offset
247
-
248
- def _compress_image(self, image_path: str, max_size: tuple = (1920, 1080), quality: int = 80) -> str:
249
- """
250
- 压缩图片
251
-
252
- Args:
253
- image_path: 图片路径
254
- max_size: 最大尺寸
255
- quality: JPEG质量(1-100)
256
-
257
- Returns:
258
- 压缩后的图片路径
259
- """
260
- if not PIL_AVAILABLE:
261
- return image_path
262
-
263
- try:
264
- img = Image.open(image_path)
265
-
266
- # 调整尺寸
267
- img.thumbnail(max_size, Image.Resampling.LANCZOS)
268
-
269
- # 转换为JPEG(更小)
270
- if image_path.endswith('.png'):
271
- jpeg_path = image_path.replace('.png', '_compressed.jpg')
272
- img.convert('RGB').save(jpeg_path, 'JPEG', quality=quality)
273
- return jpeg_path
274
-
275
- return image_path
276
- except Exception as e:
277
- print(f" ⚠️ 图片压缩失败: {e}")
278
- return image_path
279
-
280
- async def _call_vision_api(self, image_path: str, description: str) -> Dict:
281
- """调用多模态AI API"""
282
- if not DASHSCOPE_AVAILABLE:
283
- return {
284
- 'found': False,
285
- 'reason': 'dashscope未安装,请运行: pip install dashscope'
286
- }
287
-
288
- # 🎯 改进:如果初始化时没读取到API Key,再次尝试读取
289
- if not self.api_key:
290
- print(f" ⚠️ 视觉识别API Key未配置,尝试重新读取.env...")
291
- self.api_key = self._get_api_key()
292
- if self.api_key:
293
- dashscope.api_key = self.api_key
294
- print(f" ✅ 已从.env读取API Key")
295
- else:
296
- # 打印调试信息
297
- import os
298
- from pathlib import Path
299
- current_dir = Path(__file__).parent
300
- root_dir = current_dir.parent.parent.parent
301
- env_file = root_dir / '.env'
302
- print(f" ⚠️ .env文件路径: {env_file}")
303
- print(f" ⚠️ .env文件存在: {env_file.exists()}")
304
- if env_file.exists():
305
- print(f" ⚠️ 请检查.env文件中是否有DASHSCOPE_API_KEY")
306
- return {
307
- 'found': False,
308
- 'reason': '未配置API Key,请检查.env文件中的DASHSCOPE_API_KEY'
309
- }
310
-
311
- self.stats['api_calls'] += 1
312
-
313
- try:
314
- # 读取图片
315
- with open(image_path, 'rb') as f:
316
- image_data = base64.b64encode(f.read()).decode()
317
-
318
- # 构建prompt(明确说明坐标是相对于截图的)
319
- prompt = f"""请在这张移动端App截图中找到以下元素:{description}
320
-
321
- 重要:请返回元素在截图中的相对坐标(x, y),不是屏幕绝对坐标。
322
- 格式为JSON:
323
- {{
324
- "found": true/false,
325
- "x": 元素中心X坐标(相对于截图左上角,0-截图宽度),
326
- "y": 元素中心Y坐标(相对于截图左上角,0-截图高度),
327
- "confidence": 置信度(0-100),
328
- "reason": "定位原因"
329
- }}"""
330
-
331
- # 获取模型配置(支持环境变量)
332
- vision_model = self._get_vision_model()
333
-
334
- # 调用API(使用线程池避免阻塞)
335
- loop = asyncio.get_event_loop()
336
- result = await loop.run_in_executor(
337
- None,
338
- lambda: MultiModalConversation.call(
339
- model=vision_model,
340
- messages=[
341
- {
342
- "role": "user",
343
- "content": [
344
- {"image": f"data:image/png;base64,{image_data}"},
345
- {"text": prompt}
346
- ]
347
- }
348
- ]
349
- )
350
- )
351
-
352
- # 解析结果
353
- if result.status_code == 200:
354
- # 🎯 修复:兼容不同的响应格式
355
- try:
356
- # 尝试获取响应文本(可能是对象或字典)
357
- content = result.output.choices[0].message.content[0]
358
- if isinstance(content, dict):
359
- response_text = content.get('text', '') or str(content)
360
- else:
361
- response_text = content.text if hasattr(content, 'text') else str(content)
362
-
363
- # 提取JSON
364
- import json
365
- import re
366
- json_match = re.search(r'\{[^{}]*"found"[^{}]*\}', response_text, re.DOTALL)
367
- if json_match:
368
- result_data = json.loads(json_match.group(0))
369
- return result_data
370
- else:
371
- # 如果没找到JSON,尝试直接解析整个响应
372
- try:
373
- result_data = json.loads(response_text)
374
- if 'found' in result_data:
375
- return result_data
376
- except:
377
- pass
378
-
379
- return {
380
- 'found': False,
381
- 'reason': f'无法解析AI响应: {response_text[:200]}'
382
- }
383
- except Exception as e:
384
- return {
385
- 'found': False,
386
- 'reason': f'解析响应失败: {e}, 响应类型: {type(result.output.choices[0].message.content[0])}'
387
- }
388
-
389
- return {
390
- 'found': False,
391
- 'reason': f'API调用失败: status_code={result.status_code}, message={getattr(result, "message", "unknown")}'
392
- }
393
-
394
- except Exception as e:
395
- return {
396
- 'found': False,
397
- 'reason': f'视觉识别异常: {e}'
398
- }
399
-
400
- def _get_cache_key(self, description: str) -> str:
401
- """生成缓存key"""
402
- # 使用描述文本hash
403
- return hashlib.md5(description.encode()).hexdigest()[:16]
404
-
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes