mobile-mcp-ai 2.2.6__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. mobile_mcp/config.py +3 -2
  2. mobile_mcp/core/basic_tools_lite.py +3193 -0
  3. mobile_mcp/core/ios_client_wda.py +569 -0
  4. mobile_mcp/core/ios_device_manager_wda.py +306 -0
  5. mobile_mcp/core/mobile_client.py +246 -20
  6. mobile_mcp/core/template_matcher.py +429 -0
  7. mobile_mcp/core/templates/close_buttons/auto_x_0112_151217.png +0 -0
  8. mobile_mcp/core/templates/close_buttons/auto_x_0112_152037.png +0 -0
  9. mobile_mcp/core/templates/close_buttons/auto_x_0112_152840.png +0 -0
  10. mobile_mcp/core/templates/close_buttons/auto_x_0112_153256.png +0 -0
  11. mobile_mcp/core/templates/close_buttons/auto_x_0112_154847.png +0 -0
  12. mobile_mcp/core/templates/close_buttons/gray_x_stock_ad.png +0 -0
  13. mobile_mcp/mcp_tools/__init__.py +10 -0
  14. mobile_mcp/mcp_tools/mcp_server.py +992 -0
  15. mobile_mcp_ai-2.5.3.dist-info/METADATA +456 -0
  16. mobile_mcp_ai-2.5.3.dist-info/RECORD +32 -0
  17. mobile_mcp_ai-2.5.3.dist-info/entry_points.txt +2 -0
  18. mobile_mcp/core/ai/__init__.py +0 -11
  19. mobile_mcp/core/ai/ai_analyzer.py +0 -197
  20. mobile_mcp/core/ai/ai_config.py +0 -116
  21. mobile_mcp/core/ai/ai_platform_adapter.py +0 -399
  22. mobile_mcp/core/ai/smart_test_executor.py +0 -520
  23. mobile_mcp/core/ai/test_generator.py +0 -365
  24. mobile_mcp/core/ai/test_generator_from_history.py +0 -391
  25. mobile_mcp/core/ai/test_generator_standalone.py +0 -293
  26. mobile_mcp/core/assertion/__init__.py +0 -9
  27. mobile_mcp/core/assertion/smart_assertion.py +0 -341
  28. mobile_mcp/core/basic_tools.py +0 -945
  29. mobile_mcp/core/h5/__init__.py +0 -10
  30. mobile_mcp/core/h5/h5_handler.py +0 -548
  31. mobile_mcp/core/ios_client.py +0 -219
  32. mobile_mcp/core/ios_device_manager.py +0 -252
  33. mobile_mcp/core/locator/__init__.py +0 -10
  34. mobile_mcp/core/locator/cursor_ai_auto_analyzer.py +0 -119
  35. mobile_mcp/core/locator/cursor_vision_helper.py +0 -414
  36. mobile_mcp/core/locator/mobile_smart_locator.py +0 -1747
  37. mobile_mcp/core/locator/position_analyzer.py +0 -813
  38. mobile_mcp/core/locator/script_updater.py +0 -157
  39. mobile_mcp/core/nl_test_runner.py +0 -585
  40. mobile_mcp/core/smart_app_launcher.py +0 -421
  41. mobile_mcp/core/smart_tools.py +0 -311
  42. mobile_mcp/mcp/__init__.py +0 -13
  43. mobile_mcp/mcp/mcp_server.py +0 -1126
  44. mobile_mcp/mcp/mcp_server_simple.py +0 -23
  45. mobile_mcp/vision/__init__.py +0 -10
  46. mobile_mcp/vision/vision_locator.py +0 -405
  47. mobile_mcp_ai-2.2.6.dist-info/METADATA +0 -503
  48. mobile_mcp_ai-2.2.6.dist-info/RECORD +0 -49
  49. mobile_mcp_ai-2.2.6.dist-info/entry_points.txt +0 -2
  50. {mobile_mcp_ai-2.2.6.dist-info → mobile_mcp_ai-2.5.3.dist-info}/WHEEL +0 -0
  51. {mobile_mcp_ai-2.2.6.dist-info → mobile_mcp_ai-2.5.3.dist-info}/licenses/LICENSE +0 -0
  52. {mobile_mcp_ai-2.2.6.dist-info → mobile_mcp_ai-2.5.3.dist-info}/top_level.txt +0 -0
@@ -1,23 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- Mobile MCP Server - 简化版(向后兼容代理)
5
-
6
- 这个文件现在只是一个代理,自动设置为简化模式并调用主 MCP Server。
7
- 保留此文件是为了向后兼容使用简化版配置的用户。
8
-
9
- 建议:新用户直接使用 mcp_server.py 并通过环境变量 MOBILE_MCP_MODE 控制模式。
10
- """
11
-
12
- import os
13
- import sys
14
-
15
- # 设置为简化模式
16
- os.environ["MOBILE_MCP_MODE"] = "simple"
17
-
18
- # 导入并运行主 MCP Server
19
- from .mcp_server import main
20
-
21
- if __name__ == "__main__":
22
- import asyncio
23
- asyncio.run(main())
@@ -1,10 +0,0 @@
1
- """
2
- 移动端视觉识别模块
3
- """
4
-
5
- from .vision_locator import MobileVisionLocator
6
-
7
- __all__ = [
8
- 'MobileVisionLocator',
9
- ]
10
-
@@ -1,405 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- 移动端视觉定位器 - 多模态AI支持
5
-
6
- 功能:
7
- 1. 截图
8
- 2. 图片压缩
9
- 3. 多模态AI分析(通义千问VL / GPT-4V)
10
- 4. 返回元素坐标
11
- """
12
- import base64
13
- import hashlib
14
- import asyncio
15
- from typing import Dict, Optional
16
- import tempfile
17
-
18
- try:
19
- import dashscope
20
- from dashscope import MultiModalConversation
21
- DASHSCOPE_AVAILABLE = True
22
- except ImportError:
23
- DASHSCOPE_AVAILABLE = False
24
-
25
- try:
26
- from PIL import Image
27
- PIL_AVAILABLE = True
28
- except ImportError:
29
- PIL_AVAILABLE = False
30
-
31
-
32
- class MobileVisionLocator:
33
- """
34
- 移动端视觉定位器
35
-
36
- 使用多模态AI模型进行视觉元素定位
37
- """
38
-
39
- def __init__(self, mobile_client, api_key: Optional[str] = None):
40
- """
41
- 初始化视觉定位器
42
-
43
- Args:
44
- mobile_client: MobileClient实例
45
- api_key: 通义千问API Key(可选,从环境变量读取)
46
- """
47
- self.mobile_client = mobile_client
48
-
49
- # API配置
50
- self.api_key = api_key or self._get_api_key()
51
- if self.api_key and DASHSCOPE_AVAILABLE:
52
- dashscope.api_key = self.api_key
53
-
54
- # 缓存
55
- self._cache: Dict[str, Dict] = {}
56
-
57
- # 统计
58
- self.stats = {
59
- 'total_calls': 0,
60
- 'cache_hits': 0,
61
- 'api_calls': 0,
62
- }
63
-
64
- def _get_api_key(self) -> Optional[str]:
65
- """从环境变量获取API Key"""
66
- import os
67
- from pathlib import Path
68
- from dotenv import load_dotenv
69
-
70
- # 尝试加载.env文件(从mobile_mcp向上查找)
71
- current_dir = Path(__file__).parent
72
- root_dir = current_dir.parent.parent.parent # vision -> mobile_mcp -> backend -> douzi-ai
73
- env_file = root_dir / '.env'
74
-
75
- if env_file.exists():
76
- load_dotenv(env_file)
77
- print(f" ✅ 已加载.env文件: {env_file}")
78
-
79
- # 🎯 支持多种API Key名称(兼容性)
80
- api_key = (
81
- os.environ.get('DASHSCOPE_API_KEY') or
82
- os.environ.get('QWEN_API_KEY') or # 通义千问API Key
83
- os.environ.get('ALIBABA_CLOUD_API_KEY') or
84
- os.environ.get('DASHSCOPE_KEY')
85
- )
86
-
87
- if api_key:
88
- # 安全提示:不打印 API Key 相关信息,避免日志泄露
89
- print(f" ✅ 已读取视觉识别API配置")
90
- else:
91
- print(f" ⚠️ 未找到视觉识别API Key,请配置环境变量: DASHSCOPE_API_KEY")
92
-
93
- return api_key
94
-
95
- def _get_vision_model(self) -> str:
96
- """获取视觉识别模型(支持环境变量配置)"""
97
- import os
98
- # 支持环境变量配置,默认使用 qwen-vl-plus
99
- return os.environ.get('VISION_MODEL', 'qwen-vl-plus')
100
-
101
- async def locate_element_by_vision(self, element_description: str, region: Optional[Dict] = None) -> Dict:
102
- """
103
- 通过视觉识别定位元素
104
-
105
- Args:
106
- element_description: 元素描述(自然语言)
107
- region: 截图区域 {"x": 0, "y": 0, "width": 1080, "height": 2400},None则智能选择区域
108
-
109
- Returns:
110
- 定位结果(包含绝对坐标)
111
- """
112
- self.stats['total_calls'] += 1
113
-
114
- # 检查缓存
115
- cache_key = self._get_cache_key(element_description)
116
- if cache_key in self._cache:
117
- self.stats['cache_hits'] += 1
118
- return self._cache[cache_key]
119
-
120
- # 智能选择区域(如果未指定)
121
- if region is None:
122
- region = self._smart_region_selection(element_description)
123
-
124
- # 截图(支持区域截图)
125
- screenshot_path, region_offset = await self._take_screenshot(region)
126
-
127
- # 压缩图片
128
- if PIL_AVAILABLE:
129
- screenshot_path = self._compress_image(screenshot_path)
130
-
131
- # 调用多模态AI(返回相对于截图的坐标)
132
- result = await self._call_vision_api(screenshot_path, element_description)
133
-
134
- # 坐标转换:截图相对坐标 → 屏幕绝对坐标
135
- if result.get('found') and region_offset:
136
- result['x'] = result.get('x', 0) + region_offset['x']
137
- result['y'] = result.get('y', 0) + region_offset['y']
138
- result['region_offset'] = region_offset # 记录偏移量(调试用)
139
-
140
- # 缓存结果
141
- self._cache[cache_key] = result
142
-
143
- return result
144
-
145
- def _smart_region_selection(self, description: str) -> Optional[Dict]:
146
- """
147
- 智能选择截图区域(减少图片大小,提高识别精度)
148
-
149
- 根据元素描述推断应该截哪个区域:
150
- - "底部导航栏" → 只截底部区域
151
- - "顶部标题栏" → 只截顶部区域
152
- - "登录按钮" → 截中间区域
153
- """
154
- # 获取屏幕尺寸
155
- screen_info = self.mobile_client.u2.info
156
- screen_width = screen_info.get('displayWidth', 1080)
157
- screen_height = screen_info.get('displayHeight', 2400)
158
-
159
- description_lower = description.lower()
160
-
161
- # 底部区域(底部导航栏、底部按钮等)
162
- if any(keyword in description_lower for keyword in ['底部', 'bottom', '导航栏', 'tab', '底部导航']):
163
- return {
164
- 'x': 0,
165
- 'y': int(screen_height * 0.8), # 底部20%
166
- 'width': screen_width,
167
- 'height': int(screen_height * 0.2)
168
- }
169
-
170
- # 顶部区域(标题栏、顶部导航等)
171
- if any(keyword in description_lower for keyword in ['顶部', 'top', '标题', 'header', '导航栏']):
172
- return {
173
- 'x': 0,
174
- 'y': 0,
175
- 'width': screen_width,
176
- 'height': int(screen_height * 0.2) # 顶部20%
177
- }
178
-
179
- # 中间区域(登录按钮、表单等)
180
- if any(keyword in description_lower for keyword in ['登录', 'login', '按钮', 'button', '表单', 'form']):
181
- return {
182
- 'x': 0,
183
- 'y': int(screen_height * 0.3),
184
- 'width': screen_width,
185
- 'height': int(screen_height * 0.4) # 中间40%
186
- }
187
-
188
- # 默认全屏
189
- return None
190
-
191
- async def _take_screenshot(self, region: Optional[Dict] = None) -> tuple:
192
- """
193
- 截图(支持区域截图)
194
-
195
- Args:
196
- region: 截图区域 {"x": 0, "y": 0, "width": 1080, "height": 2400},None则全屏
197
-
198
- Returns:
199
- (截图路径, 区域偏移量) - 偏移量用于坐标转换
200
- """
201
- # 创建临时文件
202
- temp_file = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
203
- temp_path = temp_file.name
204
- temp_file.close()
205
-
206
- # 获取屏幕尺寸
207
- screen_info = self.mobile_client.u2.info
208
- screen_width = screen_info.get('displayWidth', 1080)
209
- screen_height = screen_info.get('displayHeight', 2400)
210
-
211
- # 区域偏移量(用于坐标转换)
212
- region_offset = {'x': 0, 'y': 0}
213
-
214
- if region:
215
- # 区域截图:先截全屏,再裁剪
216
- full_screenshot_path = temp_path.replace('.png', '_full.png')
217
- self.mobile_client.u2.screenshot(full_screenshot_path)
218
-
219
- # 裁剪区域
220
- if PIL_AVAILABLE:
221
- img = Image.open(full_screenshot_path)
222
- x = region.get('x', 0)
223
- y = region.get('y', 0)
224
- width = region.get('width', screen_width)
225
- height = region.get('height', screen_height)
226
-
227
- # 确保不越界
228
- x = max(0, min(x, screen_width))
229
- y = max(0, min(y, screen_height))
230
- width = min(width, screen_width - x)
231
- height = min(height, screen_height - y)
232
-
233
- # 裁剪
234
- cropped = img.crop((x, y, x + width, y + height))
235
- cropped.save(temp_path)
236
-
237
- # 记录偏移量
238
- region_offset = {'x': x, 'y': y}
239
- else:
240
- # PIL不可用时,使用全屏截图
241
- import shutil
242
- shutil.copy2(full_screenshot_path, temp_path)
243
- else:
244
- # 全屏截图
245
- self.mobile_client.u2.screenshot(temp_path)
246
-
247
- return temp_path, region_offset
248
-
249
- def _compress_image(self, image_path: str, max_size: tuple = (1920, 1080), quality: int = 80) -> str:
250
- """
251
- 压缩图片
252
-
253
- Args:
254
- image_path: 图片路径
255
- max_size: 最大尺寸
256
- quality: JPEG质量(1-100)
257
-
258
- Returns:
259
- 压缩后的图片路径
260
- """
261
- if not PIL_AVAILABLE:
262
- return image_path
263
-
264
- try:
265
- img = Image.open(image_path)
266
-
267
- # 调整尺寸
268
- img.thumbnail(max_size, Image.Resampling.LANCZOS)
269
-
270
- # 转换为JPEG(更小)
271
- if image_path.endswith('.png'):
272
- jpeg_path = image_path.replace('.png', '_compressed.jpg')
273
- img.convert('RGB').save(jpeg_path, 'JPEG', quality=quality)
274
- return jpeg_path
275
-
276
- return image_path
277
- except Exception as e:
278
- print(f" ⚠️ 图片压缩失败: {e}")
279
- return image_path
280
-
281
- async def _call_vision_api(self, image_path: str, description: str) -> Dict:
282
- """调用多模态AI API"""
283
- if not DASHSCOPE_AVAILABLE:
284
- return {
285
- 'found': False,
286
- 'reason': 'dashscope未安装,请运行: pip install dashscope'
287
- }
288
-
289
- # 🎯 改进:如果初始化时没读取到API Key,再次尝试读取
290
- if not self.api_key:
291
- print(f" ⚠️ 视觉识别API Key未配置,尝试重新读取.env...")
292
- self.api_key = self._get_api_key()
293
- if self.api_key:
294
- dashscope.api_key = self.api_key
295
- print(f" ✅ 已从.env读取API Key")
296
- else:
297
- # 打印调试信息
298
- import os
299
- from pathlib import Path
300
- current_dir = Path(__file__).parent
301
- root_dir = current_dir.parent.parent.parent
302
- env_file = root_dir / '.env'
303
- print(f" ⚠️ .env文件路径: {env_file}")
304
- print(f" ⚠️ .env文件存在: {env_file.exists()}")
305
- if env_file.exists():
306
- print(f" ⚠️ 请检查.env文件中是否有DASHSCOPE_API_KEY")
307
- return {
308
- 'found': False,
309
- 'reason': '未配置API Key,请检查.env文件中的DASHSCOPE_API_KEY'
310
- }
311
-
312
- self.stats['api_calls'] += 1
313
-
314
- try:
315
- # 读取图片
316
- with open(image_path, 'rb') as f:
317
- image_data = base64.b64encode(f.read()).decode()
318
-
319
- # 构建prompt(明确说明坐标是相对于截图的)
320
- prompt = f"""请在这张移动端App截图中找到以下元素:{description}
321
-
322
- 重要:请返回元素在截图中的相对坐标(x, y),不是屏幕绝对坐标。
323
- 格式为JSON:
324
- {{
325
- "found": true/false,
326
- "x": 元素中心X坐标(相对于截图左上角,0-截图宽度),
327
- "y": 元素中心Y坐标(相对于截图左上角,0-截图高度),
328
- "confidence": 置信度(0-100),
329
- "reason": "定位原因"
330
- }}"""
331
-
332
- # 获取模型配置(支持环境变量)
333
- vision_model = self._get_vision_model()
334
-
335
- # 调用API(使用线程池避免阻塞)
336
- loop = asyncio.get_event_loop()
337
- result = await loop.run_in_executor(
338
- None,
339
- lambda: MultiModalConversation.call(
340
- model=vision_model,
341
- messages=[
342
- {
343
- "role": "user",
344
- "content": [
345
- {"image": f"data:image/png;base64,{image_data}"},
346
- {"text": prompt}
347
- ]
348
- }
349
- ]
350
- )
351
- )
352
-
353
- # 解析结果
354
- if result.status_code == 200:
355
- # 🎯 修复:兼容不同的响应格式
356
- try:
357
- # 尝试获取响应文本(可能是对象或字典)
358
- content = result.output.choices[0].message.content[0]
359
- if isinstance(content, dict):
360
- response_text = content.get('text', '') or str(content)
361
- else:
362
- response_text = content.text if hasattr(content, 'text') else str(content)
363
-
364
- # 提取JSON
365
- import json
366
- import re
367
- json_match = re.search(r'\{[^{}]*"found"[^{}]*\}', response_text, re.DOTALL)
368
- if json_match:
369
- result_data = json.loads(json_match.group(0))
370
- return result_data
371
- else:
372
- # 如果没找到JSON,尝试直接解析整个响应
373
- try:
374
- result_data = json.loads(response_text)
375
- if 'found' in result_data:
376
- return result_data
377
- except:
378
- pass
379
-
380
- return {
381
- 'found': False,
382
- 'reason': f'无法解析AI响应: {response_text[:200]}'
383
- }
384
- except Exception as e:
385
- return {
386
- 'found': False,
387
- 'reason': f'解析响应失败: {e}, 响应类型: {type(result.output.choices[0].message.content[0])}'
388
- }
389
-
390
- return {
391
- 'found': False,
392
- 'reason': f'API调用失败: status_code={result.status_code}, message={getattr(result, "message", "unknown")}'
393
- }
394
-
395
- except Exception as e:
396
- return {
397
- 'found': False,
398
- 'reason': f'视觉识别异常: {e}'
399
- }
400
-
401
- def _get_cache_key(self, description: str) -> str:
402
- """生成缓存key"""
403
- # 使用描述文本hash
404
- return hashlib.md5(description.encode()).hexdigest()[:16]
405
-