PyPI - lybic-guiagents - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

lybic-guiagents 0.1.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lybic-guiagents might be problematic. Click here for more details.

Files changed (38) hide show

gui_agents/__init__.py +63 -0
gui_agents/agents/Action.py +3 -3
gui_agents/agents/Backend/ADBBackend.py +62 -0
gui_agents/agents/Backend/Backend.py +28 -0
gui_agents/agents/Backend/LybicBackend.py +354 -0
gui_agents/agents/Backend/PyAutoGUIBackend.py +183 -0
gui_agents/agents/Backend/PyAutoGUIVMwareBackend.py +250 -0
gui_agents/agents/Backend/__init__.py +0 -0
gui_agents/agents/agent_s.py +0 -2
gui_agents/agents/grounding.py +1 -6
gui_agents/agents/hardware_interface.py +24 -7
gui_agents/agents/manager.py +0 -3
gui_agents/agents/translator.py +1 -1
gui_agents/agents/worker.py +1 -2
gui_agents/cli_app.py +143 -8
gui_agents/core/engine.py +0 -2
gui_agents/core/knowledge.py +0 -2
gui_agents/lybic_client/__init__.py +0 -0
gui_agents/lybic_client/lybic_client.py +88 -0
gui_agents/prompts/__init__.py +0 -0
gui_agents/prompts/prompts.py +869 -0
gui_agents/service/__init__.py +19 -0
gui_agents/service/agent_service.py +527 -0
gui_agents/service/api_models.py +136 -0
gui_agents/service/config.py +241 -0
gui_agents/service/exceptions.py +35 -0
gui_agents/store/__init__.py +0 -0
gui_agents/store/registry.py +22 -0
gui_agents/tools/tools.py +0 -4
gui_agents/unit_test/test_manager.py +0 -2
gui_agents/unit_test/test_worker.py +0 -2
gui_agents/utils/analyze_display.py +1 -1
gui_agents/utils/common_utils.py +0 -2
{lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/METADATA +203 -75
{lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/RECORD +38 -21
{lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/WHEEL +0 -0
{lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/licenses/LICENSE +0 -0
{lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/top_level.txt +0 -0

gui_agents/agents/Backend/PyAutoGUIBackend.py ADDED Viewed

@@ -0,0 +1,183 @@
+# ---------------------------------------------------------------------------
+# 1) Desktop automation backend (PyAutoGUI)
+# ---------------------------------------------------------------------------
+import subprocess
+import sys
+import pyperclip
+from gui_agents.agents.Action import (
+    Action,
+    Click,
+    DoubleClick,
+    Move,
+    Scroll,
+    Drag,
+    TypeText,
+    Hotkey,
+    Wait,
+    Screenshot
+)
+from gui_agents.agents.Backend.Backend import Backend
+import time
+class PyAutoGUIBackend(Backend):
+    """Pure local desktop backend powered by *pyautogui*.
+    Pros  : zero dependency besides Python & pyautogui.
+    Cons  : Requires an active, visible desktop session (won't work headless).
+    """
+    _supported = {Click, DoubleClick, Move, Scroll, Drag, TypeText, Hotkey, Wait, Screenshot}
+    # ¶ PyAutoGUI sometimes throws exceptions if mouse is moved to a corner.
+    def __init__(self, default_move_duration: float = 0.0, platform: str | None = None):
+        import pyautogui as pag  # local import to avoid hard requirement
+        pag.FAILSAFE = False
+        self.pag = pag
+        self.default_move_duration = default_move_duration
+        # ↙️ Critical patch: save platform identifier
+        self.platform = (platform or sys.platform).lower()
+    # ------------------------------------------------------------------
+    def execute(self, action: Action) -> None:
+        if not self.supports(type(action)):
+            raise NotImplementedError(f"{type(action).__name__} not supported by PyAutoGUIBackend")
+        if isinstance(action, Click):
+            self._click(action)
+        elif isinstance(action, DoubleClick):
+            self._doubleClick(action)
+        elif isinstance(action, Move):
+            self._move(action)
+        elif isinstance(action, Scroll):
+            self._scroll(action)
+        elif isinstance(action, Drag):
+            self._drag(action)
+        elif isinstance(action, TypeText):
+            self._type(action)
+        elif isinstance(action, Hotkey):
+            self._hotkey(action)
+        elif isinstance(action, Screenshot):
+            screenshot = self._screenshot()
+            return screenshot # type: ignore
+        elif isinstance(action, Wait):
+            time.sleep(action.duration * 1e-3)
+        else:
+            # This shouldn't happen due to supports() check, but be safe.
+            raise NotImplementedError(f"Unhandled action: {action}")
+    # ----- individual helpers ------------------------------------------------
+    def _click(self, act: Click) -> None:
+        for k in act.holdKey or []:
+            self.pag.keyDown(k)
+            time.sleep(0.05)
+        button_str = 'primary'
+        if act.button == 1:
+            button_str = "left"
+        elif act.button == 4:
+            button_str = "middle"
+        elif act.button == 2:
+            button_str = "right"
+        self.pag.click(
+            x=act.x,
+            y=act.y,
+            clicks=1,
+            button=button_str, # type: ignore
+            duration=self.default_move_duration,
+            interval=0.5,
+        )
+        for k in act.holdKey or []:
+            self.pag.keyUp(k)
+    def _doubleClick(self, act: DoubleClick) -> None:
+        for k in act.holdKey or []:
+            self.pag.keyDown(k)
+            time.sleep(0.05)
+        button_str = 'primary'
+        if act.button == 1:
+            button_str = "left"
+        elif act.button == 4:
+            button_str = "middle"
+        elif act.button == 2:
+            button_str = "right"
+        self.pag.click(
+            x=act.x,
+            y=act.y,
+            clicks=2,
+            button=button_str,
+            duration=self.default_move_duration,
+            interval=0.5,
+        )
+        for k in act.holdKey or []:
+            self.pag.keyUp(k)
+    def _move(self, act: Move) -> None:
+        for k in act.holdKey or []:
+            self.pag.keyDown(k)
+            time.sleep(0.05)
+        self.pag.moveTo(x = act.x, y = act.y)
+        for k in act.holdKey or []:
+            self.pag.keyUp(k)
+    def _scroll(self, act: Scroll) -> None:
+        self.pag.moveTo(x = act.x, y = act.y)
+        if act.stepVertical is None:
+            if act.stepHorizontal is not None:
+                self.pag.hscroll(act.stepHorizontal)
+        else:
+            self.pag.vscroll(act.stepVertical)
+    def _drag(self, act: Drag) -> None:
+        for k in act.holdKey or []:
+            self.pag.keyDown(k)
+            time.sleep(0.05)
+        self.pag.moveTo(x=act.startX, y=act.startY)
+        time.sleep(0.1)
+        self.pag.mouseDown(button='left')
+        time.sleep(0.2)
+        self.pag.moveTo(x=act.endX, y=act.endY, duration=0.5)
+        time.sleep(0.1)
+        self.pag.mouseUp(button='left')
+        for k in act.holdKey or []:
+            self.pag.keyUp(k)
+    def _type(self, act: TypeText) -> None:
+        # ------- Paste Chinese / any text --------------------------------
+        pyperclip.copy(act.text)
+        time.sleep(0.05)  # let clipboard stabilize
+        if self.platform.startswith("darwin"):
+            # self.pag.hotkey("commandright", "v", interval=0.05)
+            # # 1. Press Command key
+            subprocess.run([
+                "osascript", "-e",
+                'tell application "System Events" to keystroke "v" using command down'
+            ])
+        else:                               # Windows / Linux
+            self.pag.hotkey("ctrl", "v", interval=0.05)
+    def _hotkey(self, act: Hotkey) -> None:
+        # self.pag.hotkey(*act.keys, interval=0.1)
+        if act.duration is not None:
+            for k in act.keys or []:
+                self.pag.keyDown(k)
+                time.sleep(act.duration * 1e-3)
+            # time.sleep(act.duration * 1e-3)
+            for k in reversed(act.keys):
+                self.pag.keyUp(k)
+        else:
+            self.pag.hotkey(*act.keys, interval=0.1)
+    def _screenshot(self):
+        screenshot = self.pag.screenshot()
+        return screenshot

gui_agents/agents/Backend/PyAutoGUIVMwareBackend.py ADDED Viewed

@@ -0,0 +1,250 @@
+# ---------------------------------------------------------------------------
+# 1) Desktop automation backend (PyAutoGUI)
+# ---------------------------------------------------------------------------
+import os
+import io
+from PIL import Image
+from typing import Optional
+from desktop_env.desktop_env import DesktopEnv
+from gui_agents.agents.Action import (
+    Action,
+    Click,
+    DoubleClick,
+    Move,
+    Scroll,
+    Drag,
+    TypeText,
+    Hotkey,
+    Wait,
+    Done,
+    Failed,
+    Screenshot
+)
+from gui_agents.agents.Backend.Backend import Backend
+import time
+def screenshot_bytes_to_pil_image(screenshot_bytes: bytes) -> Optional[Image.Image]:
+    """
+    Convert the bytes data of obs["screenshot"] to a PIL Image object, preserving the original size
+    Args:
+        screenshot_bytes: The bytes data of the screenshot
+    Returns:
+        PIL Image object, or None if conversion fails
+    """
+    try:
+        # Create PIL Image object directly from bytes
+        image = Image.open(io.BytesIO(screenshot_bytes))
+        return image
+    except Exception as e:
+        raise RuntimeError(f"Failed to convert screenshot bytes to PIL Image: {e}")
+class PyAutoGUIVMwareBackend(Backend):
+    """VMware desktop backend powered by *pyautogui*.
+    Pros  : zero dependency besides Python & pyautogui.
+    Cons  : Requires an active, visible desktop session (won't work headless).
+    """
+    _supported = {Click, DoubleClick, Move, Scroll, Drag, TypeText, Hotkey, Wait, Done, Failed, Screenshot}
+    # ¶ PyAutoGUI sometimes throws exceptions if mouse is moved to a corner.
+    def __init__(self, default_move_duration: float = 0.0, platform: str | None = None):
+        import pyautogui as pag  # local import to avoid hard requirement
+        pag.FAILSAFE = False
+        self.pag = pag
+        self.default_move_duration = default_move_duration
+        self.platform = platform
+        self.use_precreate_vm = os.getenv("USE_PRECREATE_VM")
+        if self.use_precreate_vm is not None:
+            if self.use_precreate_vm == "Ubuntu":
+                path_to_vm = os.path.join("vmware_vm_data", "Ubuntu-x86", "Ubuntu.vmx")
+            elif self.use_precreate_vm == "Windows":
+                path_to_vm = os.path.join("vmware_vm_data", "Windows-x86", "Windows 10 x64.vmx")
+            else:
+                raise ValueError(f"USE_PRECREATE_VM={self.use_precreate_vm} is not supported. Please use Ubuntu or Windows.")
+            self.env = DesktopEnv(
+                path_to_vm=path_to_vm,
+                provider_name="vmware",
+                os_type=self.use_precreate_vm,
+                action_space="pyautogui",
+                require_a11y_tree=False
+            )
+            self.env.reset()
+    # ------------------------------------------------------------------
+    def execute(self, action: Action) -> str | None:
+        if not self.supports(type(action)):
+            raise NotImplementedError(f"{type(action).__name__} not supported by PyAutoGUIBackend")
+        # For automation OSWorld evaluation
+        if self.use_precreate_vm is None:
+            if isinstance(action, Click):
+                return self._click(action)
+            elif isinstance(action, DoubleClick):
+                return self._doubleClick(action)
+            elif isinstance(action, Move):
+                return self._move(action)
+            elif isinstance(action, Scroll):
+                return self._scroll(action)
+            elif isinstance(action, Drag):
+                return self._drag(action)
+            elif isinstance(action, TypeText):
+                return self._type(action)
+            elif isinstance(action, Hotkey):
+                return self._hotkey(action)
+            elif isinstance(action, Screenshot):
+                screenshot = self._screenshot()
+                return screenshot # type: ignore
+            elif isinstance(action, Wait):
+                return f"WAIT"
+            elif isinstance(action, Done):
+                return f"DONE"
+            elif isinstance(action, Failed):
+                return f"FAIL"
+            else:
+                # This shouldn't happen due to supports() check, but be safe.
+                raise NotImplementedError(f"Unhandled action: {action}")
+        # For cli_app
+        else:
+            if isinstance(action, Click):
+                action_pyautogui_code = self._click(action)
+            elif isinstance(action, DoubleClick):
+                action_pyautogui_code = self._doubleClick(action)
+            elif isinstance(action, Move):
+                action_pyautogui_code = self._move(action)
+            elif isinstance(action, Scroll):
+                action_pyautogui_code = self._scroll(action)
+            elif isinstance(action, Drag):
+                action_pyautogui_code = self._drag(action)
+            elif isinstance(action, TypeText):
+                action_pyautogui_code = self._type(action)
+            elif isinstance(action, Hotkey):
+                action_pyautogui_code = self._hotkey(action)
+            elif isinstance(action, Screenshot):
+                screenshot = self._screenshot()
+                return screenshot # type: ignore
+            elif isinstance(action, Wait):
+                action_pyautogui_code = f"WAIT"
+            elif isinstance(action, Done):
+                action_pyautogui_code = f"DONE"
+            elif isinstance(action, Failed):
+                action_pyautogui_code = f"FAIL"
+            else:
+                # This shouldn't happen due to supports() check, but be safe.
+                raise NotImplementedError(f"Unhandled action: {action}")
+            self.env.step(action_pyautogui_code)
+    # ----- individual helpers ------------------------------------------------
+    def _click(self, act: Click) -> str:
+        button_str = 'primary'
+        if act.button == 1:
+            button_str = "left"
+        elif act.button == 4:
+            button_str = "middle"
+        elif act.button == 2:
+            button_str = "right"
+        hold_keys = act.holdKey or []
+        code_parts = []
+        for k in hold_keys:
+            code_parts.append(f"pyautogui.keyDown('{k}')")
+            code_parts.append(f"time.sleep(0.05)")
+        code_parts.append(f"pyautogui.click(x={act.x}, y={act.y}, clicks=1, button='{button_str}', duration={self.default_move_duration}, interval=0.5)")
+        for k in hold_keys:
+            code_parts.append(f"pyautogui.keyUp('{k}')")
+        return "; ".join(code_parts)
+    def _doubleClick(self, act: DoubleClick) -> str:
+        button_str = 'primary'
+        if act.button == 1:
+            button_str = "left"
+        elif act.button == 4:
+            button_str = "middle"
+        elif act.button == 2:
+            button_str = "right"
+        hold_keys = act.holdKey or []
+        code_parts = []
+        for k in hold_keys:
+            code_parts.append(f"pyautogui.keyDown('{k}')")
+            code_parts.append(f"time.sleep(0.05)")
+        code_parts.append(f"pyautogui.click(x={act.x}, y={act.y}, clicks=2, button='{button_str}', duration={self.default_move_duration}, interval=0.5)")
+        for k in hold_keys:
+            code_parts.append(f"pyautogui.keyUp('{k}')")
+        return "; ".join(code_parts)
+    def _move(self, act: Move) -> str:
+        code_parts = []
+        for k in act.holdKey or []:
+            code_parts.append(f"pyautogui.keyDown('{k}')")
+            code_parts.append(f"time.sleep(0.05)")
+        code_parts.append(f"pyautogui.moveTo(x = {act.x}, y = {act.y})")
+        for k in act.holdKey or []:
+            code_parts.append(f"pyautogui.keyUp('{k}')")
+        return "; ".join(code_parts)
+    def _scroll(self, act: Scroll) -> str:
+        code_parts = []
+        code_parts.append(f"pyautogui.moveTo(x = {act.x}, y = {act.y})")
+        if act.stepVertical is None:
+            if act.stepHorizontal is not None:
+                code_parts.append(f"pyautogui.hscroll({act.stepHorizontal})")
+        else:
+            code_parts.append(f"pyautogui.vscroll({act.stepVertical})")
+        return "; ".join(code_parts)
+    def _drag(self, act: Drag) -> str:
+        hold_keys = act.holdKey or []
+        code_parts = []
+        for k in hold_keys:
+            code_parts.append(f"pyautogui.keyDown('{k}')")
+            code_parts.append(f"time.sleep(0.05)")
+        code_parts.append(f"pyautogui.moveTo(x = {act.startX}, y = {act.startY})")
+        code_parts.append("time.sleep(0.1)")
+        code_parts.append(f"pyautogui.mouseDown(button='left')")
+        code_parts.append("time.sleep(0.2)")
+        code_parts.append(f"pyautogui.moveTo(x = {act.endX}, y = {act.endY}, duration=0.5)")
+        code_parts.append("time.sleep(0.1)")
+        code_parts.append(f"pyautogui.mouseUp(button='left')")
+        for k in hold_keys:
+            code_parts.append(f"pyautogui.keyUp('{k}')")
+        return "; ".join(code_parts)
+    def _type(self, act: TypeText) -> str:
+        code_parts = []
+        code_parts.append(f"pyautogui.write('{act.text}')")
+        return "; ".join(code_parts)
+    def _hotkey(self, act: Hotkey) -> str:
+        code_parts = []
+        if act.duration is not None:
+            for k in act.keys or []:
+                code_parts.append(f"pyautogui.keyDown('{k}')")
+                code_parts.append(f"time.sleep({act.duration} * 1e-3)")
+            for k in reversed(act.keys):
+                code_parts.append(f"pyautogui.keyUp('{k}')")
+        else:
+            keys_str = "', '".join(act.keys)
+            code_parts.append(f"pyautogui.hotkey('{keys_str}', interval=0.1)")
+        return "; ".join(code_parts)
+    def _screenshot(self) -> str:
+        if self.use_precreate_vm is None:
+            return "screenshot = pyautogui.screenshot(); return screenshot"
+        else:
+            obs = self.env._get_obs()
+            return screenshot_bytes_to_pil_image(obs["screenshot"])

gui_agents/agents/Backend/__init__.py ADDED Viewed

File without changes

gui_agents/agents/agent_s.py CHANGED Viewed

@@ -1,12 +1,10 @@
 import json
 import logging
-from math import log
 import os
 import platform
 import textwrap
 from typing import Dict, List, Optional, Tuple
-from gui_agents.agents.grounding import ACI
 from gui_agents.agents.worker import Worker
 from gui_agents.agents.manager import Manager
 from gui_agents.agents.grounding import Grounding, FastGrounding

gui_agents/agents/grounding.py CHANGED Viewed

@@ -1,13 +1,8 @@
 import ast
 import re
 import logging
-from collections import defaultdict
-from io import BytesIO
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Dict, List
 import time
-import pytesseract
-from PIL import Image
-from pytesseract import Output
 from gui_agents.tools.tools import Tools
 from gui_agents.utils.common_utils import parse_single_code_from_string

gui_agents/agents/hardware_interface.py CHANGED Viewed

@@ -1,11 +1,14 @@
 from __future__ import annotations
-import pyautogui
 from gui_agents.agents.Backend.Backend import Backend
 from gui_agents.agents.Backend.ADBBackend import ADBBackend
 from gui_agents.agents.Backend.LybicBackend import LybicBackend
-from gui_agents.agents.Backend.PyAutoGUIBackend import PyAutoGUIBackend
-from gui_agents.agents.Backend.PyAutoGUIVMwareBackend import PyAutoGUIVMwareBackend
+try:
+    from gui_agents.agents.Backend.PyAutoGUIBackend import PyAutoGUIBackend
+except ImportError:
+    PyAutoGUIBackend = None
+    pass
+# from gui_agents.agents.Backend.PyAutoGUIVMwareBackend import PyAutoGUIVMwareBackend
 """hardware_interface.py  ▸  Execute Action objects on real devices / emulators
 ===============================================================================
 This module is the *single entry point* that upper‑layer planners / executors
@@ -16,8 +19,8 @@ use to perform UI operations.  It is deliberately thin:
     into platform‑specific calls (PyAutoGUI, ADB, Lybic cloud device, …).
 *   Performs minimal capability checks + error propagation.
-The default backend implemented here is **PyAutoGUIBackend**.  Stubs for
-**ADBBackend** and **LybicBackend** show how to extend the system.
+The default backend implemented here is **PyAutoGUIBackend**.
+Available backends: **ADBBackend**, **LybicBackend**, and **PyAutoGUIVMwareBackend**.
 --------------------------------------------------------------------------
 Quick usage
@@ -27,6 +30,8 @@ from actions import Click
 from hardware_interface import HardwareInterface
 hwi = HardwareInterface(backend="pyautogui")
+# Or use Lybic SDK backend
+hwi_lybic = HardwareInterface(backend="lybic_sdk")
 # Single action
 hwi.dispatch(Click(xy=(960, 540)))
@@ -55,7 +60,7 @@ __all__ = [
     "PyAutoGUIBackend",
     "ADBBackend",
     "LybicBackend",
-    "PyAutoGUIVMwareBackend",
+   # "PyAutoGUIVMwareBackend",
 ]
@@ -70,8 +75,9 @@ class HardwareInterface:
         "pyautogui": PyAutoGUIBackend,
         "adb": ADBBackend,
         "lybic": LybicBackend,
-        "pyautogui_vmware": PyAutoGUIVMwareBackend,
     }
+    if PyAutoGUIBackend is not None:
+        BACKEND_MAP["pyautogui_vmware"] = PyAutoGUIBackend
     # ------------------------------------------------------------------
     def __init__(self, backend: str | Backend = "pyautogui", **backend_kwargs):
@@ -81,6 +87,17 @@ class HardwareInterface:
             key = backend.lower()
             if key not in self.BACKEND_MAP:
                 raise ValueError(f"Unsupported backend '{backend}'. Available: {list(self.BACKEND_MAP)}")
+            # For GUI backends, provide helpful error message in headless environments
+            if key in ["pyautogui", "pyautogui_vmware"]:
+                import os
+                if os.name == 'posix' and not os.environ.get('DISPLAY'):
+                    raise RuntimeError(
+                        f"Cannot create '{backend}' backend: No DISPLAY environment variable found. "
+                        f"This typically occurs in headless/containerized environments. "
+                        f"Consider using 'lybic' or 'adb' backend instead."
+                    )
             self.backend = self.BACKEND_MAP[key](**backend_kwargs)
     # ------------------------------------------------------------------

gui_agents/agents/manager.py CHANGED Viewed

@@ -4,7 +4,6 @@ from collections import defaultdict
 from typing import Dict, List, Optional, Tuple
 import platform
-from gui_agents.agents.grounding import ACI
 from gui_agents.core.knowledge import KnowledgeBase
 from gui_agents.agents.global_state import GlobalState
 from gui_agents.store.registry import Registry
@@ -15,8 +14,6 @@ from gui_agents.utils.common_utils import (
     agent_log_to_string,
 )
 from gui_agents.tools.tools import Tools
-from PIL import Image
-import io
 logger = logging.getLogger("desktopenv.agent")

gui_agents/agents/translator.py CHANGED Viewed

@@ -5,7 +5,7 @@ format strictly follows computer-use schema.
 """
 from __future__ import annotations
-import ast, json
+import ast
 from typing import List, Dict

gui_agents/agents/worker.py CHANGED Viewed

@@ -1,12 +1,11 @@
 import logging
 import re
 import textwrap
-from typing import Dict, List, Tuple
+from typing import Dict, List
 import platform
 import os
 import json
-from gui_agents.agents.grounding import ACI
 from gui_agents.core.knowledge import KnowledgeBase
 from gui_agents.utils.common_utils import (
     Node,

lybic-guiagents 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

Potentially problematic release.

lybic-guiagents 0.1.0py3-none-any.whl → 0.2.1py3-none-any.whl