lybic-guiagents 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lybic-guiagents might be problematic. Click here for more details.

@@ -0,0 +1,250 @@
1
+ # ---------------------------------------------------------------------------
2
+ # 1) Desktop automation backend (PyAutoGUI)
3
+ # ---------------------------------------------------------------------------
4
+ import os
5
+ import io
6
+ from PIL import Image
7
+ from typing import Optional
8
+ from desktop_env.desktop_env import DesktopEnv
9
+ from gui_agents.agents.Action import (
10
+ Action,
11
+ Click,
12
+ DoubleClick,
13
+ Move,
14
+ Scroll,
15
+ Drag,
16
+ TypeText,
17
+ Hotkey,
18
+ Wait,
19
+ Done,
20
+ Failed,
21
+ Screenshot
22
+ )
23
+
24
+ from gui_agents.agents.Backend.Backend import Backend
25
+ import time
26
+
27
+ def screenshot_bytes_to_pil_image(screenshot_bytes: bytes) -> Optional[Image.Image]:
28
+ """
29
+ Convert the bytes data of obs["screenshot"] to a PIL Image object, preserving the original size
30
+
31
+ Args:
32
+ screenshot_bytes: The bytes data of the screenshot
33
+
34
+ Returns:
35
+ PIL Image object, or None if conversion fails
36
+ """
37
+ try:
38
+ # Create PIL Image object directly from bytes
39
+ image = Image.open(io.BytesIO(screenshot_bytes))
40
+ return image
41
+ except Exception as e:
42
+ raise RuntimeError(f"Failed to convert screenshot bytes to PIL Image: {e}")
43
+
44
+ class PyAutoGUIVMwareBackend(Backend):
45
+ """VMware desktop backend powered by *pyautogui*.
46
+
47
+ Pros : zero dependency besides Python & pyautogui.
48
+ Cons : Requires an active, visible desktop session (won't work headless).
49
+ """
50
+
51
+ _supported = {Click, DoubleClick, Move, Scroll, Drag, TypeText, Hotkey, Wait, Done, Failed, Screenshot}
52
+
53
+ # ¶ PyAutoGUI sometimes throws exceptions if mouse is moved to a corner.
54
+ def __init__(self, default_move_duration: float = 0.0, platform: str | None = None):
55
+ import pyautogui as pag # local import to avoid hard requirement
56
+ pag.FAILSAFE = False
57
+ self.pag = pag
58
+ self.default_move_duration = default_move_duration
59
+ self.platform = platform
60
+ self.use_precreate_vm = os.getenv("USE_PRECREATE_VM")
61
+ if self.use_precreate_vm is not None:
62
+ if self.use_precreate_vm == "Ubuntu":
63
+ path_to_vm = os.path.join("vmware_vm_data", "Ubuntu-x86", "Ubuntu.vmx")
64
+ elif self.use_precreate_vm == "Windows":
65
+ path_to_vm = os.path.join("vmware_vm_data", "Windows-x86", "Windows 10 x64.vmx")
66
+ else:
67
+ raise ValueError(f"USE_PRECREATE_VM={self.use_precreate_vm} is not supported. Please use Ubuntu or Windows.")
68
+
69
+ self.env = DesktopEnv(
70
+ path_to_vm=path_to_vm,
71
+ provider_name="vmware",
72
+ os_type=self.use_precreate_vm,
73
+ action_space="pyautogui",
74
+ require_a11y_tree=False
75
+ )
76
+ self.env.reset()
77
+
78
+
79
+ # ------------------------------------------------------------------
80
+ def execute(self, action: Action) -> str:
81
+ if not self.supports(type(action)):
82
+ raise NotImplementedError(f"{type(action).__name__} not supported by PyAutoGUIBackend")
83
+
84
+ # For automation OSWorld evaluation
85
+ if self.use_precreate_vm is None:
86
+ if isinstance(action, Click):
87
+ return self._click(action)
88
+ elif isinstance(action, DoubleClick):
89
+ return self._doubleClick(action)
90
+ elif isinstance(action, Move):
91
+ return self._move(action)
92
+ elif isinstance(action, Scroll):
93
+ return self._scroll(action)
94
+ elif isinstance(action, Drag):
95
+ return self._drag(action)
96
+ elif isinstance(action, TypeText):
97
+ return self._type(action)
98
+ elif isinstance(action, Hotkey):
99
+ return self._hotkey(action)
100
+ elif isinstance(action, Screenshot):
101
+ screenshot = self._screenshot()
102
+ return screenshot # type: ignore
103
+ elif isinstance(action, Wait):
104
+ return f"WAIT"
105
+ elif isinstance(action, Done):
106
+ return f"DONE"
107
+ elif isinstance(action, Failed):
108
+ return f"FAIL"
109
+ else:
110
+ # This shouldn't happen due to supports() check, but be safe.
111
+ raise NotImplementedError(f"Unhandled action: {action}")
112
+
113
+ # For cli_app
114
+ else:
115
+ if isinstance(action, Click):
116
+ action_pyautogui_code = self._click(action)
117
+ elif isinstance(action, DoubleClick):
118
+ action_pyautogui_code = self._doubleClick(action)
119
+ elif isinstance(action, Move):
120
+ action_pyautogui_code = self._move(action)
121
+ elif isinstance(action, Scroll):
122
+ action_pyautogui_code = self._scroll(action)
123
+ elif isinstance(action, Drag):
124
+ action_pyautogui_code = self._drag(action)
125
+ elif isinstance(action, TypeText):
126
+ action_pyautogui_code = self._type(action)
127
+ elif isinstance(action, Hotkey):
128
+ action_pyautogui_code = self._hotkey(action)
129
+ elif isinstance(action, Screenshot):
130
+ screenshot = self._screenshot()
131
+ return screenshot # type: ignore
132
+ elif isinstance(action, Wait):
133
+ action_pyautogui_code = f"WAIT"
134
+ elif isinstance(action, Done):
135
+ action_pyautogui_code = f"DONE"
136
+ elif isinstance(action, Failed):
137
+ action_pyautogui_code = f"FAIL"
138
+ else:
139
+ # This shouldn't happen due to supports() check, but be safe.
140
+ raise NotImplementedError(f"Unhandled action: {action}")
141
+
142
+ self.env.step(action_pyautogui_code)
143
+
144
+ # ----- individual helpers ------------------------------------------------
145
+ def _click(self, act: Click) -> str:
146
+ button_str = 'primary'
147
+ if act.button == 1:
148
+ button_str = "left"
149
+ elif act.button == 4:
150
+ button_str = "middle"
151
+ elif act.button == 2:
152
+ button_str = "right"
153
+
154
+ hold_keys = act.holdKey or []
155
+ code_parts = []
156
+ for k in hold_keys:
157
+ code_parts.append(f"pyautogui.keyDown('{k}')")
158
+ code_parts.append(f"time.sleep(0.05)")
159
+ code_parts.append(f"pyautogui.click(x={act.x}, y={act.y}, clicks=1, button='{button_str}', duration={self.default_move_duration}, interval=0.5)")
160
+ for k in hold_keys:
161
+ code_parts.append(f"pyautogui.keyUp('{k}')")
162
+ return "; ".join(code_parts)
163
+
164
+ def _doubleClick(self, act: DoubleClick) -> str:
165
+
166
+ button_str = 'primary'
167
+ if act.button == 1:
168
+ button_str = "left"
169
+ elif act.button == 4:
170
+ button_str = "middle"
171
+ elif act.button == 2:
172
+ button_str = "right"
173
+
174
+
175
+ hold_keys = act.holdKey or []
176
+ code_parts = []
177
+ for k in hold_keys:
178
+ code_parts.append(f"pyautogui.keyDown('{k}')")
179
+ code_parts.append(f"time.sleep(0.05)")
180
+ code_parts.append(f"pyautogui.click(x={act.x}, y={act.y}, clicks=2, button='{button_str}', duration={self.default_move_duration}, interval=0.5)")
181
+ for k in hold_keys:
182
+ code_parts.append(f"pyautogui.keyUp('{k}')")
183
+ return "; ".join(code_parts)
184
+
185
+ def _move(self, act: Move) -> str:
186
+ code_parts = []
187
+ for k in act.holdKey or []:
188
+ code_parts.append(f"pyautogui.keyDown('{k}')")
189
+ code_parts.append(f"time.sleep(0.05)")
190
+ code_parts.append(f"pyautogui.moveTo(x = {act.x}, y = {act.y})")
191
+ for k in act.holdKey or []:
192
+ code_parts.append(f"pyautogui.keyUp('{k}')")
193
+ return "; ".join(code_parts)
194
+
195
+ def _scroll(self, act: Scroll) -> str:
196
+ code_parts = []
197
+ code_parts.append(f"pyautogui.moveTo(x = {act.x}, y = {act.y})")
198
+ if act.stepVertical is None:
199
+ if act.stepHorizontal is not None:
200
+ code_parts.append(f"pyautogui.hscroll({act.stepHorizontal})")
201
+ else:
202
+ code_parts.append(f"pyautogui.vscroll({act.stepVertical})")
203
+ return "; ".join(code_parts)
204
+
205
+ def _drag(self, act: Drag) -> str:
206
+ hold_keys = act.holdKey or []
207
+ code_parts = []
208
+ for k in hold_keys:
209
+ code_parts.append(f"pyautogui.keyDown('{k}')")
210
+ code_parts.append(f"time.sleep(0.05)")
211
+
212
+ code_parts.append(f"pyautogui.moveTo(x = {act.startX}, y = {act.startY})")
213
+ code_parts.append("time.sleep(0.1)")
214
+
215
+ code_parts.append(f"pyautogui.mouseDown(button='left')")
216
+ code_parts.append("time.sleep(0.2)")
217
+
218
+ code_parts.append(f"pyautogui.moveTo(x = {act.endX}, y = {act.endY}, duration=0.5)")
219
+ code_parts.append("time.sleep(0.1)")
220
+
221
+ code_parts.append(f"pyautogui.mouseUp(button='left')")
222
+
223
+ for k in hold_keys:
224
+ code_parts.append(f"pyautogui.keyUp('{k}')")
225
+ return "; ".join(code_parts)
226
+
227
+ def _type(self, act: TypeText) -> str:
228
+ code_parts = []
229
+ code_parts.append(f"pyautogui.write('{act.text}')")
230
+ return "; ".join(code_parts)
231
+
232
+ def _hotkey(self, act: Hotkey) -> str:
233
+ code_parts = []
234
+ if act.duration is not None:
235
+ for k in act.keys or []:
236
+ code_parts.append(f"pyautogui.keyDown('{k}')")
237
+ code_parts.append(f"time.sleep({act.duration} * 1e-3)")
238
+ for k in reversed(act.keys):
239
+ code_parts.append(f"pyautogui.keyUp('{k}')")
240
+ else:
241
+ keys_str = "', '".join(act.keys)
242
+ code_parts.append(f"pyautogui.hotkey('{keys_str}', interval=0.1)")
243
+ return "; ".join(code_parts)
244
+
245
+ def _screenshot(self) -> str:
246
+ if self.use_precreate_vm is None:
247
+ return "screenshot = pyautogui.screenshot(); return screenshot"
248
+ else:
249
+ obs = self.env._get_obs()
250
+ return screenshot_bytes_to_pil_image(obs["screenshot"])
File without changes
@@ -16,8 +16,8 @@ use to perform UI operations. It is deliberately thin:
16
16
  into platform‑specific calls (PyAutoGUI, ADB, Lybic cloud device, …).
17
17
  * Performs minimal capability checks + error propagation.
18
18
 
19
- The default backend implemented here is **PyAutoGUIBackend**. Stubs for
20
- **ADBBackend** and **LybicBackend** show how to extend the system.
19
+ The default backend implemented here is **PyAutoGUIBackend**.
20
+ Available backends: **ADBBackend**, **LybicBackend**, and **PyAutoGUIVMwareBackend**.
21
21
 
22
22
  --------------------------------------------------------------------------
23
23
  Quick usage
@@ -27,6 +27,8 @@ from actions import Click
27
27
  from hardware_interface import HardwareInterface
28
28
 
29
29
  hwi = HardwareInterface(backend="pyautogui")
30
+ # Or use Lybic SDK backend
31
+ hwi_lybic = HardwareInterface(backend="lybic_sdk")
30
32
 
31
33
  # Single action
32
34
  hwi.dispatch(Click(xy=(960, 540)))
File without changes
@@ -0,0 +1,88 @@
1
+ import httpx
2
+ from typing import Optional, Dict, Any
3
+
4
+ class LybicClient:
5
+ """Light-weight async wrapper for Lybic REST API."""
6
+
7
+ # ---------- life-cycle ----------
8
+ def __init__(self, api_key: str, base_url: str, org_id: str) -> None:
9
+ self.base = base_url.rstrip("/")
10
+ self.org_id = org_id
11
+ self.http = httpx.AsyncClient(
12
+ headers={"X-Api-Key": api_key, "Content-Type": "application/json"},
13
+ timeout=30,
14
+ )
15
+
16
+ # runtime cache (set by create_sandbox)
17
+ self.sandbox: Optional[Dict[str, Any]] = None
18
+ # self.connect_details: Optional[Dict[str, Any]] = None
19
+
20
+ async def close(self) -> None:
21
+ await self.http.aclose()
22
+
23
+ # ---------- low-level ----------
24
+ async def _req(self, path: str, method: str = "GET", json: Any = None):
25
+ r = await self.http.request(method, f"{self.base}{path}", json=json)
26
+ # ▶ 打印调试信息
27
+ req = r.request # httpx.Request 对象
28
+ print(
29
+ "[HTTP]", req.method, req.url, # 完整 URL(含 querystring)
30
+ "json=", json,
31
+ "status=", r.status_code,
32
+ )
33
+
34
+ r.raise_for_status()
35
+ return r.json()
36
+
37
+ # ---------- high-level ----------
38
+ async def create_sandbox(self, **opts) -> Dict[str, Any]:
39
+ """
40
+ Create a new sandbox and cache its metadata / connectDetails.
41
+ Returns the full response dict.
42
+ """
43
+ resp = await self._req(
44
+ f"/api/orgs/{self.org_id}/sandboxes", "POST", opts or {}
45
+ )
46
+
47
+ # cache
48
+ self.sandbox = resp
49
+ # self.connect_details = resp.get("connectDetails")
50
+ return resp
51
+
52
+ def _require_sandbox_id(self, sid: Optional[str]) -> str:
53
+ if sid:
54
+ return sid
55
+ if self.sandbox:
56
+ return self.sandbox["id"]
57
+ raise RuntimeError("No sandbox_id specified and none cached — "
58
+ "call create_sandbox() first.")
59
+
60
+ async def preview(self, sid: Optional[str] = None):
61
+ sid = self._require_sandbox_id(sid)
62
+ return await self._req(
63
+ f"/api/orgs/{self.org_id}/sandboxes/{sid}/preview", "POST"
64
+ )
65
+
66
+ async def exec_action(self, action: dict, sid: Optional[str] = None):
67
+ """
68
+ Execute a single GUI action. `sid` optional if sandbox already cached.
69
+ """
70
+ sid = self._require_sandbox_id(sid)
71
+ return await self._req(
72
+ f"/api/orgs/{self.org_id}/sandboxes/{sid}/actions/computer-use",
73
+ "POST",
74
+ {"action": action},
75
+ )
76
+
77
+ async def parse_nl(self, text: str, model: str = "ui-tars"):
78
+ return await self._req(
79
+ "/api/computer-use/parse",
80
+ "POST",
81
+ {"model": model, "textContent": text},
82
+ )
83
+
84
+ # ---------- helpers ----------
85
+ @property
86
+ def sandbox_id(self) -> Optional[str]:
87
+ return self.sandbox["id"] if self.sandbox else None
88
+
File without changes