PyPI - screenforge - Versions diffs - 0.4.0__py3-none-any.whl - Mend

screenforge 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

cli/__init__.py +0 -0
cli/_version.py +1 -0
cli/dispatch.py +266 -0
cli/doctor.py +487 -0
cli/modes/__init__.py +0 -0
cli/modes/action.py +262 -0
cli/modes/default.py +248 -0
cli/modes/demo.py +162 -0
cli/modes/dry_run.py +237 -0
cli/modes/init.py +133 -0
cli/modes/plan.py +148 -0
cli/modes/workflow.py +354 -0
cli/parser.py +305 -0
cli/reporter.py +207 -0
cli/session.py +146 -0
cli/shared.py +427 -0
cli/shorthand.py +90 -0
cli/tool_protocol_handlers.py +446 -0
common/__init__.py +0 -0
common/adapters/__init__.py +21 -0
common/adapters/android_adapter.py +273 -0
common/adapters/base_adapter.py +24 -0
common/adapters/ios_adapter.py +278 -0
common/adapters/web_adapter.py +271 -0
common/ai.py +277 -0
common/ai_autonomous.py +273 -0
common/ai_heal.py +222 -0
common/cache/__init__.py +15 -0
common/cache/cache_hash.py +57 -0
common/cache/cache_manager.py +300 -0
common/cache/cache_stats.py +133 -0
common/cache/cache_storage.py +79 -0
common/cache/embedding_loader.py +150 -0
common/capabilities.py +121 -0
common/case_memory.py +327 -0
common/error_codes.py +61 -0
common/exceptions.py +18 -0
common/executor.py +1504 -0
common/failure_diagnosis.py +138 -0
common/history_manager.py +75 -0
common/logs.py +168 -0
common/mcp_server.py +467 -0
common/preflight.py +496 -0
common/progress.py +37 -0
common/run_reporter.py +415 -0
common/run_resume.py +149 -0
common/runtime_modes.py +35 -0
common/tool_protocol.py +196 -0
common/visual_fallback.py +71 -0
common/workflow_schema.py +150 -0
config/__init__.py +0 -0
config/config.py +167 -0
config/env_loader.py +76 -0
screenforge-0.4.0.dist-info/METADATA +43 -0
screenforge-0.4.0.dist-info/RECORD +64 -0
screenforge-0.4.0.dist-info/WHEEL +5 -0
screenforge-0.4.0.dist-info/entry_points.txt +2 -0
screenforge-0.4.0.dist-info/licenses/LICENSE +21 -0
screenforge-0.4.0.dist-info/top_level.txt +4 -0
utils/__init__.py +0 -0
utils/screenshot_annotator.py +60 -0
utils/utils_ios.py +195 -0
utils/utils_web.py +304 -0
utils/utils_xml.py +218 -0

common/ai_autonomous.py ADDED Viewed

@@ -0,0 +1,273 @@
+import json
+import config.config as config
+from common.ai import AIBrain
+from common.logs import log
+from common.progress import ai_status
+class AutonomousBrain(AIBrain):
+    def get_execution_plan(
+        self,
+        goal: str,
+        context: str,
+        ui_json: str,
+        history: list,
+        platform: str = "android",
+        screenshot_base64: str = None,
+    ) -> dict:
+        try:
+            json.loads(ui_json)
+        except json.JSONDecodeError:
+            ui_json = '{"ui_elements": []}'
+        history_str = "无"
+        if history:
+            history_str = "\n".join(
+                [
+                    f"第{i + 1}步: {step['action_description']}"
+                    for i, step in enumerate(history)
+                ]
+            )
+        system_prompt = f"""
+        你是一个{platform} 自动化测试规划专家。
+        你需要根据用户目标、上下文、历史步骤和当前页面 UI 树，输出一个执行前计划。
+        你必须输出纯 JSON 对象，不要包含 markdown，结构如下：
+        {{
+            "current_state_summary": "当前页面状态摘要",
+            "planned_steps": ["步骤1", "步骤2", "步骤3"],
+            "suggested_assertion": "最终建议断言",
+            "risks": ["风险1", "风险2"]
+        }}
+        """
+        user_prompt = f"""
+        【宏观测试目标】: {goal}
+        【参考上下文(PRD/用例)】: {context if context else '无'}
+        【已执行的历史步骤】:
+        {history_str}
+        【当前屏幕 UI 树】:
+        {ui_json}
+        """
+        user_message_content = [{"type": "text", "text": user_prompt}]
+        if screenshot_base64:
+            user_message_content.append(
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{screenshot_base64}"},
+                }
+            )
+        if screenshot_base64:
+            active_client = getattr(self, "vision_client", None) or getattr(
+                self, "text_client", None
+            )
+            active_model = config.VISION_MODEL_NAME
+        else:
+            active_client = getattr(self, "text_client", None) or getattr(
+                self, "client", None
+            )
+            active_model = config.MODEL_NAME
+        if not active_client:
+            log.error("[Error] No model client available for plan generation")
+            return {
+                "current_state_summary": "模型客户端未初始化",
+                "planned_steps": [],
+                "suggested_assertion": "",
+                "risks": ["模型客户端未初始化"],
+            }
+        result_text = ""
+        try:
+            with ai_status("Planning execution steps..."):
+                response = active_client.chat.completions.create(
+                    model=active_model,
+                    messages=[
+                        {"role": "system", "content": system_prompt},
+                        {"role": "user", "content": user_message_content},
+                    ],
+                    temperature=0.1,
+                )
+            result_text = response.choices[0].message.content.strip()
+            if "```json" in result_text:
+                result_text = result_text.split("```json")[1].split("```")[0].strip()
+            elif "```" in result_text:
+                result_text = result_text.replace("```", "").strip()
+            parsed_json = json.loads(result_text)
+            parsed_json.setdefault("current_state_summary", "")
+            parsed_json.setdefault("planned_steps", [])
+            parsed_json.setdefault("suggested_assertion", "")
+            parsed_json.setdefault("risks", [])
+            return parsed_json
+        except Exception as e:
+            log.error(f"[Error] Plan model request or parse failed: {e}\nRaw response: {result_text}")
+            return {
+                "current_state_summary": "计划生成失败",
+                "planned_steps": [],
+                "suggested_assertion": "",
+                "risks": ["计划生成失败"],
+            }
+    def get_next_autonomous_action(
+        self,
+        goal: str,
+        context: str,
+        ui_json: str,
+        history: list,
+        platform: str = "android",
+        last_error: str = "",
+        screenshot_base64: str = None,
+    ) -> dict:
+        """
+        向大模型发送宏观目标、当前状态、前置报错及视觉截图，自主决策下一步动作
+        """
+        try:
+            json.loads(ui_json)
+        except json.JSONDecodeError:
+            ui_json = '{"ui_elements": []}'
+        history_str = "无"
+        if history:
+            history_str = "\n".join(
+                [
+                    f"第{i + 1}步: {step['action_description']}"
+                    for i, step in enumerate(history)
+                ]
+            )
+        error_prompt = ""
+        if last_error:
+            error_prompt = f"\n⚠️ 【特别注意 - 上一步执行失败】:\n{last_error}\n请在本次思考中分析失败原因，尝试换一种动作或定位器。\n"
+        vision_prompt = ""
+        if screenshot_base64:
+            vision_prompt = "\n👁️ 【视觉辅助】: 你同时收到了一张真实屏幕截图。请结合视觉画面与 UI 树，更精准地理解页面布局、按钮状态。如果 XML 树混乱，请以视觉画面为准。"
+        system_prompt = f"""
+        你是一个完全自主的{platform} {'多模态视觉' if screenshot_base64 else '纯文本'} 高级自动化测试 Agent。
+        你需要根据用户的【宏观测试目标】、【参考上下文】、【已执行的历史步骤】以及【当前屏幕 UI 树】{'和【屏幕截图】' if screenshot_base64 else ''}，自主决定下一步需要执行什么动作。
+        {vision_prompt}
+        允许的 action 类型:
+        - "goto": 导航到指定 URL (仅 Web 端)。必须在 extra_value 填入目标 URL (如 "https://www.google.com")。此时 locator_type 填 "global"，locator_value 填 "global"。
+        - "click": 点击元素
+        - "long_click": 长按元素
+        - "hover": 悬停元素 (针对 Web 端，触发下拉菜单等交互)
+        - "input": 在输入框中输入内容 (需通过 extra_value 参数提供内容)
+        - "swipe": 滑动屏幕寻找不在视口内的元素。必须在 extra_value 填入 "up", "down", "left" 或 "right"。此时 locator_type 填 "global"。
+        - "press": 模拟键盘或物理系统按键。必须在 extra_value 填入按键名 (如 "Enter", "Back")。此时 locator_type 填 "global"。
+        - "scroll_into_view": (仅 Web) 将元素滚动到视口内 (元素级，优于盲目 swipe)。
+        - "select": (仅 Web) 原生 <select> 下拉框选择。extra_value 填选项文本或 value。
+        - "upload": (仅 Web) 文件 <input> 上传。extra_value 填文件路径。
+        - "double_click" / "right_click": (仅 Web) 双击 / 右键点击元素。
+        - "drag": (仅 Web) 拖拽。locator 定位源，extra_value 填目标 (css 或文本)。
+        - "wait_for": 显式等待元素出现或消失 (替代死等)。extra_value 填 "visible"(默认) 或 "hidden"。
+        - "assert_exist": 校验某个元素是否在页面上出现
+        - "assert_not_exist": 校验某个元素已消失/不存在 (如加载动画消失、弹窗关闭)
+        - "assert_text_equals": 校验某个元素的文本是否与期望值【完全相等】
+        - "assert_text_contains": 校验某个元素的文本【包含】指定子串 (动态文本首选)。extra_value 填子串。
+        - "assert_value": 校验输入框/表单字段的当前值。extra_value 填期望值。
+        - "assert_url": (仅 Web) 校验当前页面 URL 包含子串。locator_type/value 填 "global"，extra_value 填 URL 子串。
+        定位器 (locator_type) 优先级: css > resourceId > text > description
+        🚨 警告: 若 resourceId 是动态随机的，必须降级使用 text 或 description！
+        💡 多个元素 text 相同时 (如每行一个 "Delete")，选带 `scope` 字段的那个 —— scope 是
+           它所在行/区块的标识文本 (如 "Bob Jones")，引擎会据此生成稳定的作用域定位器，
+           精确命中正确的那一行 (而非永远点第一行)。
+        【思考与状态决策】
+        你需要先思考 (thought)，然后评估当前状态 (status)：
+        - "running": 目标尚未完成，需要执行下一步动作。
+        - "success": 目标已达到最终校验阶段。⚠️ 强烈要求：宣告成功时，你必须在 result 中提供一个断言动作 (`assert_exist` / `assert_text_contains` / `assert_text_equals` / `assert_value` / `assert_url` 之一)，底层引擎会执行该断言并固化到测试脚本中。优先选择最能证明"目标已达成"的断言 (如登录成功后断言 URL 含 "/dashboard"，或断言欢迎文本包含用户名)。
+        - "failed": 遇到了无法克服的阻塞性错误，无法继续。
+        【强制输出格式】
+        必须输出纯 JSON 对象，不要包含任何 markdown 代码块标记，结构严格如下:
+        {{
+            "thought": "我现在的思考过程，我看到了什么，我接下来要干什么",
+            "status": "running" | "success" | "failed",
+            "result": {{"action": "...", "locator_type": "...", "locator_value": "...", "extra_value": "..."}}
+        }}
+        """
+        user_prompt = f"""
+        【宏观测试目标】: {goal}
+        【参考上下文(PRD/用例)】: {context if context else '无'}
+        【已执行的历史步骤】:
+        {history_str}
+        {error_prompt}
+        【当前屏幕 UI 树】:
+        {ui_json}
+        """
+        user_message_content = [{"type": "text", "text": user_prompt}]
+        if screenshot_base64:
+            user_message_content.append(
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{screenshot_base64}"},
+                }
+            )
+        if screenshot_base64:
+            active_client = getattr(self, "vision_client", None) or getattr(
+                self, "text_client", None
+            )  # 兼容配置
+            active_model = config.VISION_MODEL_NAME
+        else:
+            active_client = getattr(self, "text_client", None) or getattr(
+                self, "client", None
+            )
+            active_model = config.MODEL_NAME
+        if not active_client:
+            log.error("[Error] No model client available for autonomous decision")
+            return {
+                "status": "failed",
+                "thought": "模型客户端未初始化",
+                "result": {},
+            }
+        log.info(f"[Autonomous] Reasoning with model [{active_model}]...")
+        result_text = ""
+        try:
+            with ai_status("Reasoning about next action..."):
+                response = active_client.chat.completions.create(
+                    model=active_model,
+                    messages=[
+                        {"role": "system", "content": system_prompt},
+                        {"role": "user", "content": user_message_content},
+                    ],
+                    temperature=0.1,
+                )
+            result_text = response.choices[0].message.content.strip()
+            if "```json" in result_text:
+                result_text = result_text.split("```json")[1].split("```")[0].strip()
+            elif "```" in result_text:
+                result_text = result_text.replace("```", "").strip()
+            parsed_json = json.loads(result_text)
+            thought = parsed_json.get("thought", "无")
+            status = parsed_json.get("status", "failed")
+            log.info(f"[Agent] Thought: {thought}")
+            log.info(f"[Agent] Status: {status}")
+            return parsed_json
+        except Exception as e:
+            log.error(f"[Error] Autonomous model request or parse failed: {e}\nRaw response: {result_text}")
+            return {
+                "status": "failed",
+                "thought": "模型返回格式异常或请求失败",
+                "result": {},
+            }

common/ai_heal.py ADDED Viewed

@@ -0,0 +1,222 @@
+"""AI Self-Healing Engine — structured JSON output with confidence scoring."""
+import ast
+import json
+import re
+import time
+from dataclasses import dataclass
+from common.logs import log
+@dataclass
+class HealResult:
+    """Self-heal attempt result."""
+    confidence: float  # 0.0 - 1.0
+    fix_description: str
+    fixed_code: str
+    @property
+    def is_valid_syntax(self) -> bool:
+        try:
+            ast.parse(self.fixed_code)
+            return True
+        except SyntaxError:
+            return False
+def _parse_heal_response(raw: str) -> HealResult:
+    """Parse LLM response into HealResult. Tries multiple extraction strategies."""
+    # Strategy 1: direct JSON parse
+    try:
+        data = json.loads(raw)
+        return HealResult(
+            confidence=float(data.get("confidence", 0.0)),
+            fix_description=str(data.get("fix_description", "")),
+            fixed_code=str(data.get("fixed_code", "")),
+        )
+    except (json.JSONDecodeError, TypeError, ValueError):
+        pass
+    # Strategy 2: extract JSON from markdown code fence or surrounding text
+    # First try ```json ... ``` blocks
+    json_fence = re.search(r"```(?:json)?\s*\n(\{[\s\S]*?\})\s*\n```", raw)
+    if json_fence:
+        try:
+            data = json.loads(json_fence.group(1))
+            if "fixed_code" in data:
+                return HealResult(
+                    confidence=float(data.get("confidence", 0.0)),
+                    fix_description=str(data.get("fix_description", "")),
+                    fixed_code=str(data.get("fixed_code", "")),
+                )
+        except (json.JSONDecodeError, TypeError, ValueError):
+            pass
+    # Then try balanced-brace extraction from raw text
+    for m in re.finditer(r"\{", raw):
+        start = m.start()
+        depth = 0
+        in_str = False
+        escape = False
+        for i in range(start, len(raw)):
+            c = raw[i]
+            if escape:
+                escape = False
+                continue
+            if c == "\\":
+                escape = True
+                continue
+            if c == '"' and not escape:
+                in_str = not in_str
+                continue
+            if in_str:
+                continue
+            if c == "{":
+                depth += 1
+            elif c == "}":
+                depth -= 1
+                if depth == 0:
+                    candidate = raw[start : i + 1]
+                    try:
+                        data = json.loads(candidate)
+                        if "fixed_code" in data:
+                            return HealResult(
+                                confidence=float(data.get("confidence", 0.0)),
+                                fix_description=str(data.get("fix_description", "")),
+                                fixed_code=str(data.get("fixed_code", "")),
+                            )
+                    except (json.JSONDecodeError, TypeError, ValueError):
+                        pass
+                    break
+    # Strategy 3: fallback — extract python code block (legacy format), low confidence
+    code_match = re.search(r"```python\n(.*?)\n```", raw, re.DOTALL)
+    if code_match:
+        return HealResult(
+            confidence=0.3,
+            fix_description="(fallback: extracted from markdown code block)",
+            fixed_code=code_match.group(1).strip(),
+        )
+    # Strategy 4: last resort — strip backticks
+    stripped = raw.replace("```python", "").replace("```", "").strip()
+    if "def test_" in stripped:
+        return HealResult(
+            confidence=0.2,
+            fix_description="(fallback: raw text extraction)",
+            fixed_code=stripped,
+        )
+    return HealResult(confidence=0.0, fix_description="failed to parse response", fixed_code="")
+class HealerBrain:
+    """AI Self-Healing Engine."""
+    def __init__(self):
+        from openai import OpenAI
+        import config.config as config
+        self.client = OpenAI(
+            api_key=config.VISION_API_KEY, base_url=config.VISION_BASE_URL
+        )
+        self.model_name = config.VISION_MODEL_NAME
+    def heal_script(
+        self,
+        script_content: str,
+        error_msg: str,
+        error_line_num: int,
+        ui_json: str,
+        screenshot_base64: str,
+        platform: str,
+    ) -> HealResult:
+        """Analyze failure and generate fix. Returns HealResult (never None)."""
+        log.info(
+            f"🧠 [HealerBrain] Analyzing {platform} failure at line {error_line_num}..."
+        )
+        start_time = time.time()
+        system_prompt = """你是一个自动化测试自愈引擎。当测试用例执行失败时，你负责分析原因并生成修复代码。
+【输入】
+1. 报错行号和异常堆栈
+2. 案发瞬间的 UI 元素树 (JSON) 和截图
+3. 原始测试脚本
+【思考步骤】
+1. 分析报错：元素找不到？Strict Mode 多元素冲突？弹窗遮挡？
+2. 观察 UI 树和截图，找到目标元素当前的实际状态
+3. 在保证业务流完整性的前提下，修改失败的定位器代码
+【输出格式 — 必须返回 JSON】
+{
+  "confidence": 0.0到1.0的浮点数,
+  "fix_description": "简短描述修复了什么",
+  "fixed_code": "完整的修复后 Python 脚本代码"
+}
+confidence 含义：
+- 0.9-1.0: 明确找到了元素变化，修复方案确定
+- 0.7-0.8: 找到了可能的匹配，修复方案较有把握
+- 0.5-0.6: 不太确定，但做了最佳猜测
+- <0.5: 不确定修复是否正确
+注意：fixed_code 中的换行用 \\n 表示，确保 JSON 可解析。只返回 JSON，不要其他文字。"""
+        user_prompt = f"""【报错平台】: {platform}
+【报错行号】: 第 {error_line_num} 行
+【异常信息】: {error_msg}
+【UI 树】:
+{ui_json}
+【原始脚本】:
+{script_content}
+请返回 JSON 格式的修复方案。"""
+        messages = [{"role": "system", "content": system_prompt}]
+        user_content = [{"type": "text", "text": user_prompt}]
+        if screenshot_base64:
+            user_content.append(
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{screenshot_base64}"},
+                }
+            )
+        messages.append({"role": "user", "content": user_content})
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model_name,
+                messages=messages,
+                temperature=0.1,
+            )
+            raw = response.choices[0].message.content.strip()
+            result = _parse_heal_response(raw)
+            # Syntax validation — invalid syntax forces confidence to 0
+            if result.fixed_code and not result.is_valid_syntax:
+                log.warning("⚠️ [HealerBrain] Generated code has syntax errors, rejecting")
+                result = HealResult(
+                    confidence=0.0,
+                    fix_description=f"syntax error in generated code: {result.fix_description}",
+                    fixed_code="",
+                )
+            latency = time.time() - start_time
+            log.info(
+                f"⏱️ [HealerBrain] Done in {latency:.2f}s "
+                f"(confidence={result.confidence:.2f}, desc={result.fix_description[:80]})"
+            )
+            return result
+        except Exception as e:
+            log.error(f"❌ [HealerBrain] API call failed: {e}")
+            return HealResult(confidence=0.0, fix_description=f"API error: {e}", fixed_code="")

common/cache/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+from .cache_hash import compute_instruction_hash, compute_ui_hash
+from .cache_manager import CacheManager
+from .cache_stats import CacheStats
+from .cache_storage import cleanup_expired_entries, get_cache_filename, load_cache, save_cache
+__all__ = [
+    "compute_ui_hash",
+    "compute_instruction_hash",
+    "get_cache_filename",
+    "load_cache",
+    "save_cache",
+    "cleanup_expired_entries",
+    "CacheStats",
+    "CacheManager"
+]

common/cache/cache_hash.py ADDED Viewed

@@ -0,0 +1,57 @@
+import hashlib
+import json
+import re
+from typing import Any, Dict
+def _extract_semantic_fingerprint(ui_json: Dict[str, Any]) -> list:
+    """
+    提取页面锚点指纹，免疫动态数据和渲染顺序波动。
+    """
+    fingerprint_features = set()
+    elements = ui_json.get("ui_elements", [])
+    for el in elements:
+        raw_text = el.get("text", "") or el.get("desc", "")
+        # 抹除所有数字、字母、符号，只保留纯汉字
+        cn_text = re.sub(r"[^\u4e00-\u9fa5]", "", raw_text)
+        # 标准的 UI 导航或按钮，通常在 2 到 6 个汉字之间
+        if 2 <= len(cn_text) <= 6:
+            # 动态黑名单
+            if cn_text in [
+                "加密货币",
+                "比特币",
+                "meme币",
+                "美股代币",
+                "贵金属代币",
+                "热门资产",
+                "已上线",
+                "已上架",
+                "上架",
+                "下架",
+                "公告",
+                "活动",
+            ]:
+                continue
+            fingerprint_features.add(f"{el.get('class')}|{cn_text}")
+    # 强制排序并转为列表，保证哈希的绝对一致性
+    return sorted(list(fingerprint_features))
+def compute_ui_hash(ui_json: Dict[str, Any]) -> str:
+    """计算用于混合缓存匹配的页面骨架 Hash"""
+    fingerprint = _extract_semantic_fingerprint(ui_json)
+    fingerprint_str = json.dumps(fingerprint)
+    hash_obj = hashlib.sha256()
+    hash_obj.update(fingerprint_str.encode("utf-8"))
+    return hash_obj.hexdigest()
+def compute_instruction_hash(instruction: str) -> str:
+    """计算用于混合缓存 O(1) 精确匹配的指令 Hash"""
+    normalized_inst = re.sub(r"\s+", " ", instruction).strip().lower()
+    hash_obj = hashlib.sha256()
+    hash_obj.update(normalized_inst.encode("utf-8"))
+    return hash_obj.hexdigest()