lybic-guiagents 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lybic-guiagents might be problematic. Click here for more details.
- gui_agents/__init__.py +67 -0
- gui_agents/agents/Backend/ADBBackend.py +62 -0
- gui_agents/agents/Backend/Backend.py +28 -0
- gui_agents/agents/Backend/LybicBackend.py +355 -0
- gui_agents/agents/Backend/PyAutoGUIBackend.py +186 -0
- gui_agents/agents/Backend/PyAutoGUIVMwareBackend.py +250 -0
- gui_agents/agents/Backend/__init__.py +0 -0
- gui_agents/agents/hardware_interface.py +4 -2
- gui_agents/lybic_client/__init__.py +0 -0
- gui_agents/lybic_client/lybic_client.py +88 -0
- gui_agents/prompts/__init__.py +0 -0
- gui_agents/prompts/prompts.py +869 -0
- gui_agents/service/__init__.py +19 -0
- gui_agents/service/agent_service.py +527 -0
- gui_agents/service/api_models.py +136 -0
- gui_agents/service/config.py +241 -0
- gui_agents/service/exceptions.py +35 -0
- gui_agents/store/__init__.py +0 -0
- gui_agents/store/registry.py +22 -0
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.0.dist-info}/METADATA +69 -4
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.0.dist-info}/RECORD +24 -7
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.0.dist-info}/WHEEL +0 -0
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# ---------------------------------------------------------------------------
|
|
2
|
+
# 1) Desktop automation backend (PyAutoGUI)
|
|
3
|
+
# ---------------------------------------------------------------------------
|
|
4
|
+
import os
|
|
5
|
+
import io
|
|
6
|
+
from PIL import Image
|
|
7
|
+
from typing import Optional
|
|
8
|
+
from desktop_env.desktop_env import DesktopEnv
|
|
9
|
+
from gui_agents.agents.Action import (
|
|
10
|
+
Action,
|
|
11
|
+
Click,
|
|
12
|
+
DoubleClick,
|
|
13
|
+
Move,
|
|
14
|
+
Scroll,
|
|
15
|
+
Drag,
|
|
16
|
+
TypeText,
|
|
17
|
+
Hotkey,
|
|
18
|
+
Wait,
|
|
19
|
+
Done,
|
|
20
|
+
Failed,
|
|
21
|
+
Screenshot
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
from gui_agents.agents.Backend.Backend import Backend
|
|
25
|
+
import time
|
|
26
|
+
|
|
27
|
+
def screenshot_bytes_to_pil_image(screenshot_bytes: bytes) -> Optional[Image.Image]:
|
|
28
|
+
"""
|
|
29
|
+
Convert the bytes data of obs["screenshot"] to a PIL Image object, preserving the original size
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
screenshot_bytes: The bytes data of the screenshot
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
PIL Image object, or None if conversion fails
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
# Create PIL Image object directly from bytes
|
|
39
|
+
image = Image.open(io.BytesIO(screenshot_bytes))
|
|
40
|
+
return image
|
|
41
|
+
except Exception as e:
|
|
42
|
+
raise RuntimeError(f"Failed to convert screenshot bytes to PIL Image: {e}")
|
|
43
|
+
|
|
44
|
+
class PyAutoGUIVMwareBackend(Backend):
|
|
45
|
+
"""VMware desktop backend powered by *pyautogui*.
|
|
46
|
+
|
|
47
|
+
Pros : zero dependency besides Python & pyautogui.
|
|
48
|
+
Cons : Requires an active, visible desktop session (won't work headless).
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
_supported = {Click, DoubleClick, Move, Scroll, Drag, TypeText, Hotkey, Wait, Done, Failed, Screenshot}
|
|
52
|
+
|
|
53
|
+
# ¶ PyAutoGUI sometimes throws exceptions if mouse is moved to a corner.
|
|
54
|
+
def __init__(self, default_move_duration: float = 0.0, platform: str | None = None):
|
|
55
|
+
import pyautogui as pag # local import to avoid hard requirement
|
|
56
|
+
pag.FAILSAFE = False
|
|
57
|
+
self.pag = pag
|
|
58
|
+
self.default_move_duration = default_move_duration
|
|
59
|
+
self.platform = platform
|
|
60
|
+
self.use_precreate_vm = os.getenv("USE_PRECREATE_VM")
|
|
61
|
+
if self.use_precreate_vm is not None:
|
|
62
|
+
if self.use_precreate_vm == "Ubuntu":
|
|
63
|
+
path_to_vm = os.path.join("vmware_vm_data", "Ubuntu-x86", "Ubuntu.vmx")
|
|
64
|
+
elif self.use_precreate_vm == "Windows":
|
|
65
|
+
path_to_vm = os.path.join("vmware_vm_data", "Windows-x86", "Windows 10 x64.vmx")
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError(f"USE_PRECREATE_VM={self.use_precreate_vm} is not supported. Please use Ubuntu or Windows.")
|
|
68
|
+
|
|
69
|
+
self.env = DesktopEnv(
|
|
70
|
+
path_to_vm=path_to_vm,
|
|
71
|
+
provider_name="vmware",
|
|
72
|
+
os_type=self.use_precreate_vm,
|
|
73
|
+
action_space="pyautogui",
|
|
74
|
+
require_a11y_tree=False
|
|
75
|
+
)
|
|
76
|
+
self.env.reset()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# ------------------------------------------------------------------
|
|
80
|
+
def execute(self, action: Action) -> str:
|
|
81
|
+
if not self.supports(type(action)):
|
|
82
|
+
raise NotImplementedError(f"{type(action).__name__} not supported by PyAutoGUIBackend")
|
|
83
|
+
|
|
84
|
+
# For automation OSWorld evaluation
|
|
85
|
+
if self.use_precreate_vm is None:
|
|
86
|
+
if isinstance(action, Click):
|
|
87
|
+
return self._click(action)
|
|
88
|
+
elif isinstance(action, DoubleClick):
|
|
89
|
+
return self._doubleClick(action)
|
|
90
|
+
elif isinstance(action, Move):
|
|
91
|
+
return self._move(action)
|
|
92
|
+
elif isinstance(action, Scroll):
|
|
93
|
+
return self._scroll(action)
|
|
94
|
+
elif isinstance(action, Drag):
|
|
95
|
+
return self._drag(action)
|
|
96
|
+
elif isinstance(action, TypeText):
|
|
97
|
+
return self._type(action)
|
|
98
|
+
elif isinstance(action, Hotkey):
|
|
99
|
+
return self._hotkey(action)
|
|
100
|
+
elif isinstance(action, Screenshot):
|
|
101
|
+
screenshot = self._screenshot()
|
|
102
|
+
return screenshot # type: ignore
|
|
103
|
+
elif isinstance(action, Wait):
|
|
104
|
+
return f"WAIT"
|
|
105
|
+
elif isinstance(action, Done):
|
|
106
|
+
return f"DONE"
|
|
107
|
+
elif isinstance(action, Failed):
|
|
108
|
+
return f"FAIL"
|
|
109
|
+
else:
|
|
110
|
+
# This shouldn't happen due to supports() check, but be safe.
|
|
111
|
+
raise NotImplementedError(f"Unhandled action: {action}")
|
|
112
|
+
|
|
113
|
+
# For cli_app
|
|
114
|
+
else:
|
|
115
|
+
if isinstance(action, Click):
|
|
116
|
+
action_pyautogui_code = self._click(action)
|
|
117
|
+
elif isinstance(action, DoubleClick):
|
|
118
|
+
action_pyautogui_code = self._doubleClick(action)
|
|
119
|
+
elif isinstance(action, Move):
|
|
120
|
+
action_pyautogui_code = self._move(action)
|
|
121
|
+
elif isinstance(action, Scroll):
|
|
122
|
+
action_pyautogui_code = self._scroll(action)
|
|
123
|
+
elif isinstance(action, Drag):
|
|
124
|
+
action_pyautogui_code = self._drag(action)
|
|
125
|
+
elif isinstance(action, TypeText):
|
|
126
|
+
action_pyautogui_code = self._type(action)
|
|
127
|
+
elif isinstance(action, Hotkey):
|
|
128
|
+
action_pyautogui_code = self._hotkey(action)
|
|
129
|
+
elif isinstance(action, Screenshot):
|
|
130
|
+
screenshot = self._screenshot()
|
|
131
|
+
return screenshot # type: ignore
|
|
132
|
+
elif isinstance(action, Wait):
|
|
133
|
+
action_pyautogui_code = f"WAIT"
|
|
134
|
+
elif isinstance(action, Done):
|
|
135
|
+
action_pyautogui_code = f"DONE"
|
|
136
|
+
elif isinstance(action, Failed):
|
|
137
|
+
action_pyautogui_code = f"FAIL"
|
|
138
|
+
else:
|
|
139
|
+
# This shouldn't happen due to supports() check, but be safe.
|
|
140
|
+
raise NotImplementedError(f"Unhandled action: {action}")
|
|
141
|
+
|
|
142
|
+
self.env.step(action_pyautogui_code)
|
|
143
|
+
|
|
144
|
+
# ----- individual helpers ------------------------------------------------
|
|
145
|
+
def _click(self, act: Click) -> str:
|
|
146
|
+
button_str = 'primary'
|
|
147
|
+
if act.button == 1:
|
|
148
|
+
button_str = "left"
|
|
149
|
+
elif act.button == 4:
|
|
150
|
+
button_str = "middle"
|
|
151
|
+
elif act.button == 2:
|
|
152
|
+
button_str = "right"
|
|
153
|
+
|
|
154
|
+
hold_keys = act.holdKey or []
|
|
155
|
+
code_parts = []
|
|
156
|
+
for k in hold_keys:
|
|
157
|
+
code_parts.append(f"pyautogui.keyDown('{k}')")
|
|
158
|
+
code_parts.append(f"time.sleep(0.05)")
|
|
159
|
+
code_parts.append(f"pyautogui.click(x={act.x}, y={act.y}, clicks=1, button='{button_str}', duration={self.default_move_duration}, interval=0.5)")
|
|
160
|
+
for k in hold_keys:
|
|
161
|
+
code_parts.append(f"pyautogui.keyUp('{k}')")
|
|
162
|
+
return "; ".join(code_parts)
|
|
163
|
+
|
|
164
|
+
def _doubleClick(self, act: DoubleClick) -> str:
|
|
165
|
+
|
|
166
|
+
button_str = 'primary'
|
|
167
|
+
if act.button == 1:
|
|
168
|
+
button_str = "left"
|
|
169
|
+
elif act.button == 4:
|
|
170
|
+
button_str = "middle"
|
|
171
|
+
elif act.button == 2:
|
|
172
|
+
button_str = "right"
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
hold_keys = act.holdKey or []
|
|
176
|
+
code_parts = []
|
|
177
|
+
for k in hold_keys:
|
|
178
|
+
code_parts.append(f"pyautogui.keyDown('{k}')")
|
|
179
|
+
code_parts.append(f"time.sleep(0.05)")
|
|
180
|
+
code_parts.append(f"pyautogui.click(x={act.x}, y={act.y}, clicks=2, button='{button_str}', duration={self.default_move_duration}, interval=0.5)")
|
|
181
|
+
for k in hold_keys:
|
|
182
|
+
code_parts.append(f"pyautogui.keyUp('{k}')")
|
|
183
|
+
return "; ".join(code_parts)
|
|
184
|
+
|
|
185
|
+
def _move(self, act: Move) -> str:
|
|
186
|
+
code_parts = []
|
|
187
|
+
for k in act.holdKey or []:
|
|
188
|
+
code_parts.append(f"pyautogui.keyDown('{k}')")
|
|
189
|
+
code_parts.append(f"time.sleep(0.05)")
|
|
190
|
+
code_parts.append(f"pyautogui.moveTo(x = {act.x}, y = {act.y})")
|
|
191
|
+
for k in act.holdKey or []:
|
|
192
|
+
code_parts.append(f"pyautogui.keyUp('{k}')")
|
|
193
|
+
return "; ".join(code_parts)
|
|
194
|
+
|
|
195
|
+
def _scroll(self, act: Scroll) -> str:
|
|
196
|
+
code_parts = []
|
|
197
|
+
code_parts.append(f"pyautogui.moveTo(x = {act.x}, y = {act.y})")
|
|
198
|
+
if act.stepVertical is None:
|
|
199
|
+
if act.stepHorizontal is not None:
|
|
200
|
+
code_parts.append(f"pyautogui.hscroll({act.stepHorizontal})")
|
|
201
|
+
else:
|
|
202
|
+
code_parts.append(f"pyautogui.vscroll({act.stepVertical})")
|
|
203
|
+
return "; ".join(code_parts)
|
|
204
|
+
|
|
205
|
+
def _drag(self, act: Drag) -> str:
|
|
206
|
+
hold_keys = act.holdKey or []
|
|
207
|
+
code_parts = []
|
|
208
|
+
for k in hold_keys:
|
|
209
|
+
code_parts.append(f"pyautogui.keyDown('{k}')")
|
|
210
|
+
code_parts.append(f"time.sleep(0.05)")
|
|
211
|
+
|
|
212
|
+
code_parts.append(f"pyautogui.moveTo(x = {act.startX}, y = {act.startY})")
|
|
213
|
+
code_parts.append("time.sleep(0.1)")
|
|
214
|
+
|
|
215
|
+
code_parts.append(f"pyautogui.mouseDown(button='left')")
|
|
216
|
+
code_parts.append("time.sleep(0.2)")
|
|
217
|
+
|
|
218
|
+
code_parts.append(f"pyautogui.moveTo(x = {act.endX}, y = {act.endY}, duration=0.5)")
|
|
219
|
+
code_parts.append("time.sleep(0.1)")
|
|
220
|
+
|
|
221
|
+
code_parts.append(f"pyautogui.mouseUp(button='left')")
|
|
222
|
+
|
|
223
|
+
for k in hold_keys:
|
|
224
|
+
code_parts.append(f"pyautogui.keyUp('{k}')")
|
|
225
|
+
return "; ".join(code_parts)
|
|
226
|
+
|
|
227
|
+
def _type(self, act: TypeText) -> str:
|
|
228
|
+
code_parts = []
|
|
229
|
+
code_parts.append(f"pyautogui.write('{act.text}')")
|
|
230
|
+
return "; ".join(code_parts)
|
|
231
|
+
|
|
232
|
+
def _hotkey(self, act: Hotkey) -> str:
|
|
233
|
+
code_parts = []
|
|
234
|
+
if act.duration is not None:
|
|
235
|
+
for k in act.keys or []:
|
|
236
|
+
code_parts.append(f"pyautogui.keyDown('{k}')")
|
|
237
|
+
code_parts.append(f"time.sleep({act.duration} * 1e-3)")
|
|
238
|
+
for k in reversed(act.keys):
|
|
239
|
+
code_parts.append(f"pyautogui.keyUp('{k}')")
|
|
240
|
+
else:
|
|
241
|
+
keys_str = "', '".join(act.keys)
|
|
242
|
+
code_parts.append(f"pyautogui.hotkey('{keys_str}', interval=0.1)")
|
|
243
|
+
return "; ".join(code_parts)
|
|
244
|
+
|
|
245
|
+
def _screenshot(self) -> str:
|
|
246
|
+
if self.use_precreate_vm is None:
|
|
247
|
+
return "screenshot = pyautogui.screenshot(); return screenshot"
|
|
248
|
+
else:
|
|
249
|
+
obs = self.env._get_obs()
|
|
250
|
+
return screenshot_bytes_to_pil_image(obs["screenshot"])
|
|
File without changes
|
|
@@ -16,8 +16,8 @@ use to perform UI operations. It is deliberately thin:
|
|
|
16
16
|
into platform‑specific calls (PyAutoGUI, ADB, Lybic cloud device, …).
|
|
17
17
|
* Performs minimal capability checks + error propagation.
|
|
18
18
|
|
|
19
|
-
The default backend implemented here is **PyAutoGUIBackend**.
|
|
20
|
-
**ADBBackend** and **
|
|
19
|
+
The default backend implemented here is **PyAutoGUIBackend**.
|
|
20
|
+
Available backends: **ADBBackend**, **LybicBackend**, and **PyAutoGUIVMwareBackend**.
|
|
21
21
|
|
|
22
22
|
--------------------------------------------------------------------------
|
|
23
23
|
Quick usage
|
|
@@ -27,6 +27,8 @@ from actions import Click
|
|
|
27
27
|
from hardware_interface import HardwareInterface
|
|
28
28
|
|
|
29
29
|
hwi = HardwareInterface(backend="pyautogui")
|
|
30
|
+
# Or use Lybic SDK backend
|
|
31
|
+
hwi_lybic = HardwareInterface(backend="lybic_sdk")
|
|
30
32
|
|
|
31
33
|
# Single action
|
|
32
34
|
hwi.dispatch(Click(xy=(960, 540)))
|
|
File without changes
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
from typing import Optional, Dict, Any
|
|
3
|
+
|
|
4
|
+
class LybicClient:
|
|
5
|
+
"""Light-weight async wrapper for Lybic REST API."""
|
|
6
|
+
|
|
7
|
+
# ---------- life-cycle ----------
|
|
8
|
+
def __init__(self, api_key: str, base_url: str, org_id: str) -> None:
|
|
9
|
+
self.base = base_url.rstrip("/")
|
|
10
|
+
self.org_id = org_id
|
|
11
|
+
self.http = httpx.AsyncClient(
|
|
12
|
+
headers={"X-Api-Key": api_key, "Content-Type": "application/json"},
|
|
13
|
+
timeout=30,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
# runtime cache (set by create_sandbox)
|
|
17
|
+
self.sandbox: Optional[Dict[str, Any]] = None
|
|
18
|
+
# self.connect_details: Optional[Dict[str, Any]] = None
|
|
19
|
+
|
|
20
|
+
async def close(self) -> None:
|
|
21
|
+
await self.http.aclose()
|
|
22
|
+
|
|
23
|
+
# ---------- low-level ----------
|
|
24
|
+
async def _req(self, path: str, method: str = "GET", json: Any = None):
|
|
25
|
+
r = await self.http.request(method, f"{self.base}{path}", json=json)
|
|
26
|
+
# ▶ 打印调试信息
|
|
27
|
+
req = r.request # httpx.Request 对象
|
|
28
|
+
print(
|
|
29
|
+
"[HTTP]", req.method, req.url, # 完整 URL(含 querystring)
|
|
30
|
+
"json=", json,
|
|
31
|
+
"status=", r.status_code,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
r.raise_for_status()
|
|
35
|
+
return r.json()
|
|
36
|
+
|
|
37
|
+
# ---------- high-level ----------
|
|
38
|
+
async def create_sandbox(self, **opts) -> Dict[str, Any]:
|
|
39
|
+
"""
|
|
40
|
+
Create a new sandbox and cache its metadata / connectDetails.
|
|
41
|
+
Returns the full response dict.
|
|
42
|
+
"""
|
|
43
|
+
resp = await self._req(
|
|
44
|
+
f"/api/orgs/{self.org_id}/sandboxes", "POST", opts or {}
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# cache
|
|
48
|
+
self.sandbox = resp
|
|
49
|
+
# self.connect_details = resp.get("connectDetails")
|
|
50
|
+
return resp
|
|
51
|
+
|
|
52
|
+
def _require_sandbox_id(self, sid: Optional[str]) -> str:
|
|
53
|
+
if sid:
|
|
54
|
+
return sid
|
|
55
|
+
if self.sandbox:
|
|
56
|
+
return self.sandbox["id"]
|
|
57
|
+
raise RuntimeError("No sandbox_id specified and none cached — "
|
|
58
|
+
"call create_sandbox() first.")
|
|
59
|
+
|
|
60
|
+
async def preview(self, sid: Optional[str] = None):
|
|
61
|
+
sid = self._require_sandbox_id(sid)
|
|
62
|
+
return await self._req(
|
|
63
|
+
f"/api/orgs/{self.org_id}/sandboxes/{sid}/preview", "POST"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
async def exec_action(self, action: dict, sid: Optional[str] = None):
|
|
67
|
+
"""
|
|
68
|
+
Execute a single GUI action. `sid` optional if sandbox already cached.
|
|
69
|
+
"""
|
|
70
|
+
sid = self._require_sandbox_id(sid)
|
|
71
|
+
return await self._req(
|
|
72
|
+
f"/api/orgs/{self.org_id}/sandboxes/{sid}/actions/computer-use",
|
|
73
|
+
"POST",
|
|
74
|
+
{"action": action},
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
async def parse_nl(self, text: str, model: str = "ui-tars"):
|
|
78
|
+
return await self._req(
|
|
79
|
+
"/api/computer-use/parse",
|
|
80
|
+
"POST",
|
|
81
|
+
{"model": model, "textContent": text},
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# ---------- helpers ----------
|
|
85
|
+
@property
|
|
86
|
+
def sandbox_id(self) -> Optional[str]:
|
|
87
|
+
return self.sandbox["id"] if self.sandbox else None
|
|
88
|
+
|
|
File without changes
|