lybic-guiagents 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lybic-guiagents might be problematic. Click here for more details.
- gui_agents/__init__.py +63 -0
- gui_agents/agents/Action.py +3 -3
- gui_agents/agents/Backend/ADBBackend.py +62 -0
- gui_agents/agents/Backend/Backend.py +28 -0
- gui_agents/agents/Backend/LybicBackend.py +354 -0
- gui_agents/agents/Backend/PyAutoGUIBackend.py +183 -0
- gui_agents/agents/Backend/PyAutoGUIVMwareBackend.py +250 -0
- gui_agents/agents/Backend/__init__.py +0 -0
- gui_agents/agents/agent_s.py +0 -2
- gui_agents/agents/grounding.py +1 -6
- gui_agents/agents/hardware_interface.py +24 -7
- gui_agents/agents/manager.py +0 -3
- gui_agents/agents/translator.py +1 -1
- gui_agents/agents/worker.py +1 -2
- gui_agents/cli_app.py +143 -8
- gui_agents/core/engine.py +0 -2
- gui_agents/core/knowledge.py +0 -2
- gui_agents/lybic_client/__init__.py +0 -0
- gui_agents/lybic_client/lybic_client.py +88 -0
- gui_agents/prompts/__init__.py +0 -0
- gui_agents/prompts/prompts.py +869 -0
- gui_agents/service/__init__.py +19 -0
- gui_agents/service/agent_service.py +527 -0
- gui_agents/service/api_models.py +136 -0
- gui_agents/service/config.py +241 -0
- gui_agents/service/exceptions.py +35 -0
- gui_agents/store/__init__.py +0 -0
- gui_agents/store/registry.py +22 -0
- gui_agents/tools/tools.py +0 -4
- gui_agents/unit_test/test_manager.py +0 -2
- gui_agents/unit_test/test_worker.py +0 -2
- gui_agents/utils/analyze_display.py +1 -1
- gui_agents/utils/common_utils.py +0 -2
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/METADATA +203 -75
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/RECORD +38 -21
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/WHEEL +0 -0
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# ---------------------------------------------------------------------------
|
|
2
|
+
# 1) Desktop automation backend (PyAutoGUI)
|
|
3
|
+
# ---------------------------------------------------------------------------
|
|
4
|
+
import subprocess
|
|
5
|
+
import sys
|
|
6
|
+
import pyperclip
|
|
7
|
+
from gui_agents.agents.Action import (
|
|
8
|
+
Action,
|
|
9
|
+
Click,
|
|
10
|
+
DoubleClick,
|
|
11
|
+
Move,
|
|
12
|
+
Scroll,
|
|
13
|
+
Drag,
|
|
14
|
+
TypeText,
|
|
15
|
+
Hotkey,
|
|
16
|
+
Wait,
|
|
17
|
+
Screenshot
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from gui_agents.agents.Backend.Backend import Backend
|
|
21
|
+
import time
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class PyAutoGUIBackend(Backend):
|
|
25
|
+
"""Pure local desktop backend powered by *pyautogui*.
|
|
26
|
+
|
|
27
|
+
Pros : zero dependency besides Python & pyautogui.
|
|
28
|
+
Cons : Requires an active, visible desktop session (won't work headless).
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
_supported = {Click, DoubleClick, Move, Scroll, Drag, TypeText, Hotkey, Wait, Screenshot}
|
|
32
|
+
|
|
33
|
+
# ¶ PyAutoGUI sometimes throws exceptions if mouse is moved to a corner.
|
|
34
|
+
def __init__(self, default_move_duration: float = 0.0, platform: str | None = None):
|
|
35
|
+
import pyautogui as pag # local import to avoid hard requirement
|
|
36
|
+
pag.FAILSAFE = False
|
|
37
|
+
self.pag = pag
|
|
38
|
+
self.default_move_duration = default_move_duration
|
|
39
|
+
# ↙️ Critical patch: save platform identifier
|
|
40
|
+
self.platform = (platform or sys.platform).lower()
|
|
41
|
+
|
|
42
|
+
# ------------------------------------------------------------------
|
|
43
|
+
def execute(self, action: Action) -> None:
|
|
44
|
+
if not self.supports(type(action)):
|
|
45
|
+
raise NotImplementedError(f"{type(action).__name__} not supported by PyAutoGUIBackend")
|
|
46
|
+
|
|
47
|
+
if isinstance(action, Click):
|
|
48
|
+
self._click(action)
|
|
49
|
+
elif isinstance(action, DoubleClick):
|
|
50
|
+
self._doubleClick(action)
|
|
51
|
+
elif isinstance(action, Move):
|
|
52
|
+
self._move(action)
|
|
53
|
+
elif isinstance(action, Scroll):
|
|
54
|
+
self._scroll(action)
|
|
55
|
+
elif isinstance(action, Drag):
|
|
56
|
+
self._drag(action)
|
|
57
|
+
elif isinstance(action, TypeText):
|
|
58
|
+
self._type(action)
|
|
59
|
+
elif isinstance(action, Hotkey):
|
|
60
|
+
self._hotkey(action)
|
|
61
|
+
elif isinstance(action, Screenshot):
|
|
62
|
+
screenshot = self._screenshot()
|
|
63
|
+
return screenshot # type: ignore
|
|
64
|
+
elif isinstance(action, Wait):
|
|
65
|
+
time.sleep(action.duration * 1e-3)
|
|
66
|
+
else:
|
|
67
|
+
# This shouldn't happen due to supports() check, but be safe.
|
|
68
|
+
raise NotImplementedError(f"Unhandled action: {action}")
|
|
69
|
+
|
|
70
|
+
# ----- individual helpers ------------------------------------------------
|
|
71
|
+
def _click(self, act: Click) -> None:
|
|
72
|
+
for k in act.holdKey or []:
|
|
73
|
+
self.pag.keyDown(k)
|
|
74
|
+
time.sleep(0.05)
|
|
75
|
+
|
|
76
|
+
button_str = 'primary'
|
|
77
|
+
if act.button == 1:
|
|
78
|
+
button_str = "left"
|
|
79
|
+
elif act.button == 4:
|
|
80
|
+
button_str = "middle"
|
|
81
|
+
elif act.button == 2:
|
|
82
|
+
button_str = "right"
|
|
83
|
+
|
|
84
|
+
self.pag.click(
|
|
85
|
+
x=act.x,
|
|
86
|
+
y=act.y,
|
|
87
|
+
clicks=1,
|
|
88
|
+
button=button_str, # type: ignore
|
|
89
|
+
duration=self.default_move_duration,
|
|
90
|
+
interval=0.5,
|
|
91
|
+
)
|
|
92
|
+
for k in act.holdKey or []:
|
|
93
|
+
self.pag.keyUp(k)
|
|
94
|
+
|
|
95
|
+
def _doubleClick(self, act: DoubleClick) -> None:
|
|
96
|
+
for k in act.holdKey or []:
|
|
97
|
+
self.pag.keyDown(k)
|
|
98
|
+
time.sleep(0.05)
|
|
99
|
+
button_str = 'primary'
|
|
100
|
+
if act.button == 1:
|
|
101
|
+
button_str = "left"
|
|
102
|
+
elif act.button == 4:
|
|
103
|
+
button_str = "middle"
|
|
104
|
+
elif act.button == 2:
|
|
105
|
+
button_str = "right"
|
|
106
|
+
|
|
107
|
+
self.pag.click(
|
|
108
|
+
x=act.x,
|
|
109
|
+
y=act.y,
|
|
110
|
+
clicks=2,
|
|
111
|
+
button=button_str,
|
|
112
|
+
duration=self.default_move_duration,
|
|
113
|
+
interval=0.5,
|
|
114
|
+
)
|
|
115
|
+
for k in act.holdKey or []:
|
|
116
|
+
self.pag.keyUp(k)
|
|
117
|
+
|
|
118
|
+
def _move(self, act: Move) -> None:
|
|
119
|
+
for k in act.holdKey or []:
|
|
120
|
+
self.pag.keyDown(k)
|
|
121
|
+
time.sleep(0.05)
|
|
122
|
+
self.pag.moveTo(x = act.x, y = act.y)
|
|
123
|
+
for k in act.holdKey or []:
|
|
124
|
+
self.pag.keyUp(k)
|
|
125
|
+
|
|
126
|
+
def _scroll(self, act: Scroll) -> None:
|
|
127
|
+
self.pag.moveTo(x = act.x, y = act.y)
|
|
128
|
+
if act.stepVertical is None:
|
|
129
|
+
if act.stepHorizontal is not None:
|
|
130
|
+
self.pag.hscroll(act.stepHorizontal)
|
|
131
|
+
else:
|
|
132
|
+
self.pag.vscroll(act.stepVertical)
|
|
133
|
+
|
|
134
|
+
def _drag(self, act: Drag) -> None:
|
|
135
|
+
for k in act.holdKey or []:
|
|
136
|
+
self.pag.keyDown(k)
|
|
137
|
+
time.sleep(0.05)
|
|
138
|
+
|
|
139
|
+
self.pag.moveTo(x=act.startX, y=act.startY)
|
|
140
|
+
time.sleep(0.1)
|
|
141
|
+
|
|
142
|
+
self.pag.mouseDown(button='left')
|
|
143
|
+
time.sleep(0.2)
|
|
144
|
+
|
|
145
|
+
self.pag.moveTo(x=act.endX, y=act.endY, duration=0.5)
|
|
146
|
+
time.sleep(0.1)
|
|
147
|
+
|
|
148
|
+
self.pag.mouseUp(button='left')
|
|
149
|
+
|
|
150
|
+
for k in act.holdKey or []:
|
|
151
|
+
self.pag.keyUp(k)
|
|
152
|
+
|
|
153
|
+
def _type(self, act: TypeText) -> None:
|
|
154
|
+
# ------- Paste Chinese / any text --------------------------------
|
|
155
|
+
pyperclip.copy(act.text)
|
|
156
|
+
time.sleep(0.05) # let clipboard stabilize
|
|
157
|
+
|
|
158
|
+
if self.platform.startswith("darwin"):
|
|
159
|
+
# self.pag.hotkey("commandright", "v", interval=0.05)
|
|
160
|
+
# # 1. Press Command key
|
|
161
|
+
subprocess.run([
|
|
162
|
+
"osascript", "-e",
|
|
163
|
+
'tell application "System Events" to keystroke "v" using command down'
|
|
164
|
+
])
|
|
165
|
+
|
|
166
|
+
else: # Windows / Linux
|
|
167
|
+
self.pag.hotkey("ctrl", "v", interval=0.05)
|
|
168
|
+
|
|
169
|
+
def _hotkey(self, act: Hotkey) -> None:
|
|
170
|
+
# self.pag.hotkey(*act.keys, interval=0.1)
|
|
171
|
+
if act.duration is not None:
|
|
172
|
+
for k in act.keys or []:
|
|
173
|
+
self.pag.keyDown(k)
|
|
174
|
+
time.sleep(act.duration * 1e-3)
|
|
175
|
+
# time.sleep(act.duration * 1e-3)
|
|
176
|
+
for k in reversed(act.keys):
|
|
177
|
+
self.pag.keyUp(k)
|
|
178
|
+
else:
|
|
179
|
+
self.pag.hotkey(*act.keys, interval=0.1)
|
|
180
|
+
|
|
181
|
+
def _screenshot(self):
|
|
182
|
+
screenshot = self.pag.screenshot()
|
|
183
|
+
return screenshot
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# ---------------------------------------------------------------------------
|
|
2
|
+
# 1) Desktop automation backend (PyAutoGUI)
|
|
3
|
+
# ---------------------------------------------------------------------------
|
|
4
|
+
import os
|
|
5
|
+
import io
|
|
6
|
+
from PIL import Image
|
|
7
|
+
from typing import Optional
|
|
8
|
+
from desktop_env.desktop_env import DesktopEnv
|
|
9
|
+
from gui_agents.agents.Action import (
|
|
10
|
+
Action,
|
|
11
|
+
Click,
|
|
12
|
+
DoubleClick,
|
|
13
|
+
Move,
|
|
14
|
+
Scroll,
|
|
15
|
+
Drag,
|
|
16
|
+
TypeText,
|
|
17
|
+
Hotkey,
|
|
18
|
+
Wait,
|
|
19
|
+
Done,
|
|
20
|
+
Failed,
|
|
21
|
+
Screenshot
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
from gui_agents.agents.Backend.Backend import Backend
|
|
25
|
+
import time
|
|
26
|
+
|
|
27
|
+
def screenshot_bytes_to_pil_image(screenshot_bytes: bytes) -> Optional[Image.Image]:
|
|
28
|
+
"""
|
|
29
|
+
Convert the bytes data of obs["screenshot"] to a PIL Image object, preserving the original size
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
screenshot_bytes: The bytes data of the screenshot
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
PIL Image object, or None if conversion fails
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
# Create PIL Image object directly from bytes
|
|
39
|
+
image = Image.open(io.BytesIO(screenshot_bytes))
|
|
40
|
+
return image
|
|
41
|
+
except Exception as e:
|
|
42
|
+
raise RuntimeError(f"Failed to convert screenshot bytes to PIL Image: {e}")
|
|
43
|
+
|
|
44
|
+
class PyAutoGUIVMwareBackend(Backend):
|
|
45
|
+
"""VMware desktop backend powered by *pyautogui*.
|
|
46
|
+
|
|
47
|
+
Pros : zero dependency besides Python & pyautogui.
|
|
48
|
+
Cons : Requires an active, visible desktop session (won't work headless).
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
_supported = {Click, DoubleClick, Move, Scroll, Drag, TypeText, Hotkey, Wait, Done, Failed, Screenshot}
|
|
52
|
+
|
|
53
|
+
# ¶ PyAutoGUI sometimes throws exceptions if mouse is moved to a corner.
|
|
54
|
+
def __init__(self, default_move_duration: float = 0.0, platform: str | None = None):
|
|
55
|
+
import pyautogui as pag # local import to avoid hard requirement
|
|
56
|
+
pag.FAILSAFE = False
|
|
57
|
+
self.pag = pag
|
|
58
|
+
self.default_move_duration = default_move_duration
|
|
59
|
+
self.platform = platform
|
|
60
|
+
self.use_precreate_vm = os.getenv("USE_PRECREATE_VM")
|
|
61
|
+
if self.use_precreate_vm is not None:
|
|
62
|
+
if self.use_precreate_vm == "Ubuntu":
|
|
63
|
+
path_to_vm = os.path.join("vmware_vm_data", "Ubuntu-x86", "Ubuntu.vmx")
|
|
64
|
+
elif self.use_precreate_vm == "Windows":
|
|
65
|
+
path_to_vm = os.path.join("vmware_vm_data", "Windows-x86", "Windows 10 x64.vmx")
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError(f"USE_PRECREATE_VM={self.use_precreate_vm} is not supported. Please use Ubuntu or Windows.")
|
|
68
|
+
|
|
69
|
+
self.env = DesktopEnv(
|
|
70
|
+
path_to_vm=path_to_vm,
|
|
71
|
+
provider_name="vmware",
|
|
72
|
+
os_type=self.use_precreate_vm,
|
|
73
|
+
action_space="pyautogui",
|
|
74
|
+
require_a11y_tree=False
|
|
75
|
+
)
|
|
76
|
+
self.env.reset()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# ------------------------------------------------------------------
|
|
80
|
+
def execute(self, action: Action) -> str | None:
|
|
81
|
+
if not self.supports(type(action)):
|
|
82
|
+
raise NotImplementedError(f"{type(action).__name__} not supported by PyAutoGUIBackend")
|
|
83
|
+
|
|
84
|
+
# For automation OSWorld evaluation
|
|
85
|
+
if self.use_precreate_vm is None:
|
|
86
|
+
if isinstance(action, Click):
|
|
87
|
+
return self._click(action)
|
|
88
|
+
elif isinstance(action, DoubleClick):
|
|
89
|
+
return self._doubleClick(action)
|
|
90
|
+
elif isinstance(action, Move):
|
|
91
|
+
return self._move(action)
|
|
92
|
+
elif isinstance(action, Scroll):
|
|
93
|
+
return self._scroll(action)
|
|
94
|
+
elif isinstance(action, Drag):
|
|
95
|
+
return self._drag(action)
|
|
96
|
+
elif isinstance(action, TypeText):
|
|
97
|
+
return self._type(action)
|
|
98
|
+
elif isinstance(action, Hotkey):
|
|
99
|
+
return self._hotkey(action)
|
|
100
|
+
elif isinstance(action, Screenshot):
|
|
101
|
+
screenshot = self._screenshot()
|
|
102
|
+
return screenshot # type: ignore
|
|
103
|
+
elif isinstance(action, Wait):
|
|
104
|
+
return f"WAIT"
|
|
105
|
+
elif isinstance(action, Done):
|
|
106
|
+
return f"DONE"
|
|
107
|
+
elif isinstance(action, Failed):
|
|
108
|
+
return f"FAIL"
|
|
109
|
+
else:
|
|
110
|
+
# This shouldn't happen due to supports() check, but be safe.
|
|
111
|
+
raise NotImplementedError(f"Unhandled action: {action}")
|
|
112
|
+
|
|
113
|
+
# For cli_app
|
|
114
|
+
else:
|
|
115
|
+
if isinstance(action, Click):
|
|
116
|
+
action_pyautogui_code = self._click(action)
|
|
117
|
+
elif isinstance(action, DoubleClick):
|
|
118
|
+
action_pyautogui_code = self._doubleClick(action)
|
|
119
|
+
elif isinstance(action, Move):
|
|
120
|
+
action_pyautogui_code = self._move(action)
|
|
121
|
+
elif isinstance(action, Scroll):
|
|
122
|
+
action_pyautogui_code = self._scroll(action)
|
|
123
|
+
elif isinstance(action, Drag):
|
|
124
|
+
action_pyautogui_code = self._drag(action)
|
|
125
|
+
elif isinstance(action, TypeText):
|
|
126
|
+
action_pyautogui_code = self._type(action)
|
|
127
|
+
elif isinstance(action, Hotkey):
|
|
128
|
+
action_pyautogui_code = self._hotkey(action)
|
|
129
|
+
elif isinstance(action, Screenshot):
|
|
130
|
+
screenshot = self._screenshot()
|
|
131
|
+
return screenshot # type: ignore
|
|
132
|
+
elif isinstance(action, Wait):
|
|
133
|
+
action_pyautogui_code = f"WAIT"
|
|
134
|
+
elif isinstance(action, Done):
|
|
135
|
+
action_pyautogui_code = f"DONE"
|
|
136
|
+
elif isinstance(action, Failed):
|
|
137
|
+
action_pyautogui_code = f"FAIL"
|
|
138
|
+
else:
|
|
139
|
+
# This shouldn't happen due to supports() check, but be safe.
|
|
140
|
+
raise NotImplementedError(f"Unhandled action: {action}")
|
|
141
|
+
|
|
142
|
+
self.env.step(action_pyautogui_code)
|
|
143
|
+
|
|
144
|
+
# ----- individual helpers ------------------------------------------------
|
|
145
|
+
def _click(self, act: Click) -> str:
|
|
146
|
+
button_str = 'primary'
|
|
147
|
+
if act.button == 1:
|
|
148
|
+
button_str = "left"
|
|
149
|
+
elif act.button == 4:
|
|
150
|
+
button_str = "middle"
|
|
151
|
+
elif act.button == 2:
|
|
152
|
+
button_str = "right"
|
|
153
|
+
|
|
154
|
+
hold_keys = act.holdKey or []
|
|
155
|
+
code_parts = []
|
|
156
|
+
for k in hold_keys:
|
|
157
|
+
code_parts.append(f"pyautogui.keyDown('{k}')")
|
|
158
|
+
code_parts.append(f"time.sleep(0.05)")
|
|
159
|
+
code_parts.append(f"pyautogui.click(x={act.x}, y={act.y}, clicks=1, button='{button_str}', duration={self.default_move_duration}, interval=0.5)")
|
|
160
|
+
for k in hold_keys:
|
|
161
|
+
code_parts.append(f"pyautogui.keyUp('{k}')")
|
|
162
|
+
return "; ".join(code_parts)
|
|
163
|
+
|
|
164
|
+
def _doubleClick(self, act: DoubleClick) -> str:
|
|
165
|
+
|
|
166
|
+
button_str = 'primary'
|
|
167
|
+
if act.button == 1:
|
|
168
|
+
button_str = "left"
|
|
169
|
+
elif act.button == 4:
|
|
170
|
+
button_str = "middle"
|
|
171
|
+
elif act.button == 2:
|
|
172
|
+
button_str = "right"
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
hold_keys = act.holdKey or []
|
|
176
|
+
code_parts = []
|
|
177
|
+
for k in hold_keys:
|
|
178
|
+
code_parts.append(f"pyautogui.keyDown('{k}')")
|
|
179
|
+
code_parts.append(f"time.sleep(0.05)")
|
|
180
|
+
code_parts.append(f"pyautogui.click(x={act.x}, y={act.y}, clicks=2, button='{button_str}', duration={self.default_move_duration}, interval=0.5)")
|
|
181
|
+
for k in hold_keys:
|
|
182
|
+
code_parts.append(f"pyautogui.keyUp('{k}')")
|
|
183
|
+
return "; ".join(code_parts)
|
|
184
|
+
|
|
185
|
+
def _move(self, act: Move) -> str:
|
|
186
|
+
code_parts = []
|
|
187
|
+
for k in act.holdKey or []:
|
|
188
|
+
code_parts.append(f"pyautogui.keyDown('{k}')")
|
|
189
|
+
code_parts.append(f"time.sleep(0.05)")
|
|
190
|
+
code_parts.append(f"pyautogui.moveTo(x = {act.x}, y = {act.y})")
|
|
191
|
+
for k in act.holdKey or []:
|
|
192
|
+
code_parts.append(f"pyautogui.keyUp('{k}')")
|
|
193
|
+
return "; ".join(code_parts)
|
|
194
|
+
|
|
195
|
+
def _scroll(self, act: Scroll) -> str:
|
|
196
|
+
code_parts = []
|
|
197
|
+
code_parts.append(f"pyautogui.moveTo(x = {act.x}, y = {act.y})")
|
|
198
|
+
if act.stepVertical is None:
|
|
199
|
+
if act.stepHorizontal is not None:
|
|
200
|
+
code_parts.append(f"pyautogui.hscroll({act.stepHorizontal})")
|
|
201
|
+
else:
|
|
202
|
+
code_parts.append(f"pyautogui.vscroll({act.stepVertical})")
|
|
203
|
+
return "; ".join(code_parts)
|
|
204
|
+
|
|
205
|
+
def _drag(self, act: Drag) -> str:
|
|
206
|
+
hold_keys = act.holdKey or []
|
|
207
|
+
code_parts = []
|
|
208
|
+
for k in hold_keys:
|
|
209
|
+
code_parts.append(f"pyautogui.keyDown('{k}')")
|
|
210
|
+
code_parts.append(f"time.sleep(0.05)")
|
|
211
|
+
|
|
212
|
+
code_parts.append(f"pyautogui.moveTo(x = {act.startX}, y = {act.startY})")
|
|
213
|
+
code_parts.append("time.sleep(0.1)")
|
|
214
|
+
|
|
215
|
+
code_parts.append(f"pyautogui.mouseDown(button='left')")
|
|
216
|
+
code_parts.append("time.sleep(0.2)")
|
|
217
|
+
|
|
218
|
+
code_parts.append(f"pyautogui.moveTo(x = {act.endX}, y = {act.endY}, duration=0.5)")
|
|
219
|
+
code_parts.append("time.sleep(0.1)")
|
|
220
|
+
|
|
221
|
+
code_parts.append(f"pyautogui.mouseUp(button='left')")
|
|
222
|
+
|
|
223
|
+
for k in hold_keys:
|
|
224
|
+
code_parts.append(f"pyautogui.keyUp('{k}')")
|
|
225
|
+
return "; ".join(code_parts)
|
|
226
|
+
|
|
227
|
+
def _type(self, act: TypeText) -> str:
|
|
228
|
+
code_parts = []
|
|
229
|
+
code_parts.append(f"pyautogui.write('{act.text}')")
|
|
230
|
+
return "; ".join(code_parts)
|
|
231
|
+
|
|
232
|
+
def _hotkey(self, act: Hotkey) -> str:
|
|
233
|
+
code_parts = []
|
|
234
|
+
if act.duration is not None:
|
|
235
|
+
for k in act.keys or []:
|
|
236
|
+
code_parts.append(f"pyautogui.keyDown('{k}')")
|
|
237
|
+
code_parts.append(f"time.sleep({act.duration} * 1e-3)")
|
|
238
|
+
for k in reversed(act.keys):
|
|
239
|
+
code_parts.append(f"pyautogui.keyUp('{k}')")
|
|
240
|
+
else:
|
|
241
|
+
keys_str = "', '".join(act.keys)
|
|
242
|
+
code_parts.append(f"pyautogui.hotkey('{keys_str}', interval=0.1)")
|
|
243
|
+
return "; ".join(code_parts)
|
|
244
|
+
|
|
245
|
+
def _screenshot(self) -> str:
|
|
246
|
+
if self.use_precreate_vm is None:
|
|
247
|
+
return "screenshot = pyautogui.screenshot(); return screenshot"
|
|
248
|
+
else:
|
|
249
|
+
obs = self.env._get_obs()
|
|
250
|
+
return screenshot_bytes_to_pil_image(obs["screenshot"])
|
|
File without changes
|
gui_agents/agents/agent_s.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
from math import log
|
|
4
3
|
import os
|
|
5
4
|
import platform
|
|
6
5
|
import textwrap
|
|
7
6
|
from typing import Dict, List, Optional, Tuple
|
|
8
7
|
|
|
9
|
-
from gui_agents.agents.grounding import ACI
|
|
10
8
|
from gui_agents.agents.worker import Worker
|
|
11
9
|
from gui_agents.agents.manager import Manager
|
|
12
10
|
from gui_agents.agents.grounding import Grounding, FastGrounding
|
gui_agents/agents/grounding.py
CHANGED
|
@@ -1,13 +1,8 @@
|
|
|
1
1
|
import ast
|
|
2
2
|
import re
|
|
3
3
|
import logging
|
|
4
|
-
from
|
|
5
|
-
from io import BytesIO
|
|
6
|
-
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
4
|
+
from typing import Dict, List
|
|
7
5
|
import time
|
|
8
|
-
import pytesseract
|
|
9
|
-
from PIL import Image
|
|
10
|
-
from pytesseract import Output
|
|
11
6
|
|
|
12
7
|
from gui_agents.tools.tools import Tools
|
|
13
8
|
from gui_agents.utils.common_utils import parse_single_code_from_string
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import pyautogui
|
|
4
3
|
from gui_agents.agents.Backend.Backend import Backend
|
|
5
4
|
from gui_agents.agents.Backend.ADBBackend import ADBBackend
|
|
6
5
|
from gui_agents.agents.Backend.LybicBackend import LybicBackend
|
|
7
|
-
|
|
8
|
-
from gui_agents.agents.Backend.
|
|
6
|
+
try:
|
|
7
|
+
from gui_agents.agents.Backend.PyAutoGUIBackend import PyAutoGUIBackend
|
|
8
|
+
except ImportError:
|
|
9
|
+
PyAutoGUIBackend = None
|
|
10
|
+
pass
|
|
11
|
+
# from gui_agents.agents.Backend.PyAutoGUIVMwareBackend import PyAutoGUIVMwareBackend
|
|
9
12
|
"""hardware_interface.py ▸ Execute Action objects on real devices / emulators
|
|
10
13
|
===============================================================================
|
|
11
14
|
This module is the *single entry point* that upper‑layer planners / executors
|
|
@@ -16,8 +19,8 @@ use to perform UI operations. It is deliberately thin:
|
|
|
16
19
|
into platform‑specific calls (PyAutoGUI, ADB, Lybic cloud device, …).
|
|
17
20
|
* Performs minimal capability checks + error propagation.
|
|
18
21
|
|
|
19
|
-
The default backend implemented here is **PyAutoGUIBackend**.
|
|
20
|
-
**ADBBackend** and **
|
|
22
|
+
The default backend implemented here is **PyAutoGUIBackend**.
|
|
23
|
+
Available backends: **ADBBackend**, **LybicBackend**, and **PyAutoGUIVMwareBackend**.
|
|
21
24
|
|
|
22
25
|
--------------------------------------------------------------------------
|
|
23
26
|
Quick usage
|
|
@@ -27,6 +30,8 @@ from actions import Click
|
|
|
27
30
|
from hardware_interface import HardwareInterface
|
|
28
31
|
|
|
29
32
|
hwi = HardwareInterface(backend="pyautogui")
|
|
33
|
+
# Or use Lybic SDK backend
|
|
34
|
+
hwi_lybic = HardwareInterface(backend="lybic_sdk")
|
|
30
35
|
|
|
31
36
|
# Single action
|
|
32
37
|
hwi.dispatch(Click(xy=(960, 540)))
|
|
@@ -55,7 +60,7 @@ __all__ = [
|
|
|
55
60
|
"PyAutoGUIBackend",
|
|
56
61
|
"ADBBackend",
|
|
57
62
|
"LybicBackend",
|
|
58
|
-
|
|
63
|
+
# "PyAutoGUIVMwareBackend",
|
|
59
64
|
]
|
|
60
65
|
|
|
61
66
|
|
|
@@ -70,8 +75,9 @@ class HardwareInterface:
|
|
|
70
75
|
"pyautogui": PyAutoGUIBackend,
|
|
71
76
|
"adb": ADBBackend,
|
|
72
77
|
"lybic": LybicBackend,
|
|
73
|
-
"pyautogui_vmware": PyAutoGUIVMwareBackend,
|
|
74
78
|
}
|
|
79
|
+
if PyAutoGUIBackend is not None:
|
|
80
|
+
BACKEND_MAP["pyautogui_vmware"] = PyAutoGUIBackend
|
|
75
81
|
|
|
76
82
|
# ------------------------------------------------------------------
|
|
77
83
|
def __init__(self, backend: str | Backend = "pyautogui", **backend_kwargs):
|
|
@@ -81,6 +87,17 @@ class HardwareInterface:
|
|
|
81
87
|
key = backend.lower()
|
|
82
88
|
if key not in self.BACKEND_MAP:
|
|
83
89
|
raise ValueError(f"Unsupported backend '{backend}'. Available: {list(self.BACKEND_MAP)}")
|
|
90
|
+
|
|
91
|
+
# For GUI backends, provide helpful error message in headless environments
|
|
92
|
+
if key in ["pyautogui", "pyautogui_vmware"]:
|
|
93
|
+
import os
|
|
94
|
+
if os.name == 'posix' and not os.environ.get('DISPLAY'):
|
|
95
|
+
raise RuntimeError(
|
|
96
|
+
f"Cannot create '{backend}' backend: No DISPLAY environment variable found. "
|
|
97
|
+
f"This typically occurs in headless/containerized environments. "
|
|
98
|
+
f"Consider using 'lybic' or 'adb' backend instead."
|
|
99
|
+
)
|
|
100
|
+
|
|
84
101
|
self.backend = self.BACKEND_MAP[key](**backend_kwargs)
|
|
85
102
|
|
|
86
103
|
# ------------------------------------------------------------------
|
gui_agents/agents/manager.py
CHANGED
|
@@ -4,7 +4,6 @@ from collections import defaultdict
|
|
|
4
4
|
from typing import Dict, List, Optional, Tuple
|
|
5
5
|
import platform
|
|
6
6
|
|
|
7
|
-
from gui_agents.agents.grounding import ACI
|
|
8
7
|
from gui_agents.core.knowledge import KnowledgeBase
|
|
9
8
|
from gui_agents.agents.global_state import GlobalState
|
|
10
9
|
from gui_agents.store.registry import Registry
|
|
@@ -15,8 +14,6 @@ from gui_agents.utils.common_utils import (
|
|
|
15
14
|
agent_log_to_string,
|
|
16
15
|
)
|
|
17
16
|
from gui_agents.tools.tools import Tools
|
|
18
|
-
from PIL import Image
|
|
19
|
-
import io
|
|
20
17
|
|
|
21
18
|
logger = logging.getLogger("desktopenv.agent")
|
|
22
19
|
|
gui_agents/agents/translator.py
CHANGED
gui_agents/agents/worker.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
3
|
import textwrap
|
|
4
|
-
from typing import Dict, List
|
|
4
|
+
from typing import Dict, List
|
|
5
5
|
import platform
|
|
6
6
|
import os
|
|
7
7
|
import json
|
|
8
8
|
|
|
9
|
-
from gui_agents.agents.grounding import ACI
|
|
10
9
|
from gui_agents.core.knowledge import KnowledgeBase
|
|
11
10
|
from gui_agents.utils.common_utils import (
|
|
12
11
|
Node,
|