lybic-guiagents 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lybic-guiagents might be problematic. Click here for more details.

Files changed (38) hide show
  1. gui_agents/__init__.py +63 -0
  2. gui_agents/agents/Action.py +3 -3
  3. gui_agents/agents/Backend/ADBBackend.py +62 -0
  4. gui_agents/agents/Backend/Backend.py +28 -0
  5. gui_agents/agents/Backend/LybicBackend.py +354 -0
  6. gui_agents/agents/Backend/PyAutoGUIBackend.py +183 -0
  7. gui_agents/agents/Backend/PyAutoGUIVMwareBackend.py +250 -0
  8. gui_agents/agents/Backend/__init__.py +0 -0
  9. gui_agents/agents/agent_s.py +0 -2
  10. gui_agents/agents/grounding.py +1 -6
  11. gui_agents/agents/hardware_interface.py +24 -7
  12. gui_agents/agents/manager.py +0 -3
  13. gui_agents/agents/translator.py +1 -1
  14. gui_agents/agents/worker.py +1 -2
  15. gui_agents/cli_app.py +143 -8
  16. gui_agents/core/engine.py +0 -2
  17. gui_agents/core/knowledge.py +0 -2
  18. gui_agents/lybic_client/__init__.py +0 -0
  19. gui_agents/lybic_client/lybic_client.py +88 -0
  20. gui_agents/prompts/__init__.py +0 -0
  21. gui_agents/prompts/prompts.py +869 -0
  22. gui_agents/service/__init__.py +19 -0
  23. gui_agents/service/agent_service.py +527 -0
  24. gui_agents/service/api_models.py +136 -0
  25. gui_agents/service/config.py +241 -0
  26. gui_agents/service/exceptions.py +35 -0
  27. gui_agents/store/__init__.py +0 -0
  28. gui_agents/store/registry.py +22 -0
  29. gui_agents/tools/tools.py +0 -4
  30. gui_agents/unit_test/test_manager.py +0 -2
  31. gui_agents/unit_test/test_worker.py +0 -2
  32. gui_agents/utils/analyze_display.py +1 -1
  33. gui_agents/utils/common_utils.py +0 -2
  34. {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/METADATA +203 -75
  35. {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/RECORD +38 -21
  36. {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/WHEEL +0 -0
  37. {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/licenses/LICENSE +0 -0
  38. {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,183 @@
1
+ # ---------------------------------------------------------------------------
2
+ # 1) Desktop automation backend (PyAutoGUI)
3
+ # ---------------------------------------------------------------------------
4
+ import subprocess
5
+ import sys
6
+ import pyperclip
7
+ from gui_agents.agents.Action import (
8
+ Action,
9
+ Click,
10
+ DoubleClick,
11
+ Move,
12
+ Scroll,
13
+ Drag,
14
+ TypeText,
15
+ Hotkey,
16
+ Wait,
17
+ Screenshot
18
+ )
19
+
20
+ from gui_agents.agents.Backend.Backend import Backend
21
+ import time
22
+
23
+
24
+ class PyAutoGUIBackend(Backend):
25
+ """Pure local desktop backend powered by *pyautogui*.
26
+
27
+ Pros : zero dependency besides Python & pyautogui.
28
+ Cons : Requires an active, visible desktop session (won't work headless).
29
+ """
30
+
31
+ _supported = {Click, DoubleClick, Move, Scroll, Drag, TypeText, Hotkey, Wait, Screenshot}
32
+
33
+ # ¶ PyAutoGUI sometimes throws exceptions if mouse is moved to a corner.
34
+ def __init__(self, default_move_duration: float = 0.0, platform: str | None = None):
35
+ import pyautogui as pag # local import to avoid hard requirement
36
+ pag.FAILSAFE = False
37
+ self.pag = pag
38
+ self.default_move_duration = default_move_duration
39
+ # ↙️ Critical patch: save platform identifier
40
+ self.platform = (platform or sys.platform).lower()
41
+
42
+ # ------------------------------------------------------------------
43
+ def execute(self, action: Action) -> None:
44
+ if not self.supports(type(action)):
45
+ raise NotImplementedError(f"{type(action).__name__} not supported by PyAutoGUIBackend")
46
+
47
+ if isinstance(action, Click):
48
+ self._click(action)
49
+ elif isinstance(action, DoubleClick):
50
+ self._doubleClick(action)
51
+ elif isinstance(action, Move):
52
+ self._move(action)
53
+ elif isinstance(action, Scroll):
54
+ self._scroll(action)
55
+ elif isinstance(action, Drag):
56
+ self._drag(action)
57
+ elif isinstance(action, TypeText):
58
+ self._type(action)
59
+ elif isinstance(action, Hotkey):
60
+ self._hotkey(action)
61
+ elif isinstance(action, Screenshot):
62
+ screenshot = self._screenshot()
63
+ return screenshot # type: ignore
64
+ elif isinstance(action, Wait):
65
+ time.sleep(action.duration * 1e-3)
66
+ else:
67
+ # This shouldn't happen due to supports() check, but be safe.
68
+ raise NotImplementedError(f"Unhandled action: {action}")
69
+
70
+ # ----- individual helpers ------------------------------------------------
71
+ def _click(self, act: Click) -> None:
72
+ for k in act.holdKey or []:
73
+ self.pag.keyDown(k)
74
+ time.sleep(0.05)
75
+
76
+ button_str = 'primary'
77
+ if act.button == 1:
78
+ button_str = "left"
79
+ elif act.button == 4:
80
+ button_str = "middle"
81
+ elif act.button == 2:
82
+ button_str = "right"
83
+
84
+ self.pag.click(
85
+ x=act.x,
86
+ y=act.y,
87
+ clicks=1,
88
+ button=button_str, # type: ignore
89
+ duration=self.default_move_duration,
90
+ interval=0.5,
91
+ )
92
+ for k in act.holdKey or []:
93
+ self.pag.keyUp(k)
94
+
95
+ def _doubleClick(self, act: DoubleClick) -> None:
96
+ for k in act.holdKey or []:
97
+ self.pag.keyDown(k)
98
+ time.sleep(0.05)
99
+ button_str = 'primary'
100
+ if act.button == 1:
101
+ button_str = "left"
102
+ elif act.button == 4:
103
+ button_str = "middle"
104
+ elif act.button == 2:
105
+ button_str = "right"
106
+
107
+ self.pag.click(
108
+ x=act.x,
109
+ y=act.y,
110
+ clicks=2,
111
+ button=button_str,
112
+ duration=self.default_move_duration,
113
+ interval=0.5,
114
+ )
115
+ for k in act.holdKey or []:
116
+ self.pag.keyUp(k)
117
+
118
+ def _move(self, act: Move) -> None:
119
+ for k in act.holdKey or []:
120
+ self.pag.keyDown(k)
121
+ time.sleep(0.05)
122
+ self.pag.moveTo(x = act.x, y = act.y)
123
+ for k in act.holdKey or []:
124
+ self.pag.keyUp(k)
125
+
126
+ def _scroll(self, act: Scroll) -> None:
127
+ self.pag.moveTo(x = act.x, y = act.y)
128
+ if act.stepVertical is None:
129
+ if act.stepHorizontal is not None:
130
+ self.pag.hscroll(act.stepHorizontal)
131
+ else:
132
+ self.pag.vscroll(act.stepVertical)
133
+
134
+ def _drag(self, act: Drag) -> None:
135
+ for k in act.holdKey or []:
136
+ self.pag.keyDown(k)
137
+ time.sleep(0.05)
138
+
139
+ self.pag.moveTo(x=act.startX, y=act.startY)
140
+ time.sleep(0.1)
141
+
142
+ self.pag.mouseDown(button='left')
143
+ time.sleep(0.2)
144
+
145
+ self.pag.moveTo(x=act.endX, y=act.endY, duration=0.5)
146
+ time.sleep(0.1)
147
+
148
+ self.pag.mouseUp(button='left')
149
+
150
+ for k in act.holdKey or []:
151
+ self.pag.keyUp(k)
152
+
153
+ def _type(self, act: TypeText) -> None:
154
+ # ------- Paste Chinese / any text --------------------------------
155
+ pyperclip.copy(act.text)
156
+ time.sleep(0.05) # let clipboard stabilize
157
+
158
+ if self.platform.startswith("darwin"):
159
+ # self.pag.hotkey("commandright", "v", interval=0.05)
160
+ # # 1. Press Command key
161
+ subprocess.run([
162
+ "osascript", "-e",
163
+ 'tell application "System Events" to keystroke "v" using command down'
164
+ ])
165
+
166
+ else: # Windows / Linux
167
+ self.pag.hotkey("ctrl", "v", interval=0.05)
168
+
169
+ def _hotkey(self, act: Hotkey) -> None:
170
+ # self.pag.hotkey(*act.keys, interval=0.1)
171
+ if act.duration is not None:
172
+ for k in act.keys or []:
173
+ self.pag.keyDown(k)
174
+ time.sleep(act.duration * 1e-3)
175
+ # time.sleep(act.duration * 1e-3)
176
+ for k in reversed(act.keys):
177
+ self.pag.keyUp(k)
178
+ else:
179
+ self.pag.hotkey(*act.keys, interval=0.1)
180
+
181
+ def _screenshot(self):
182
+ screenshot = self.pag.screenshot()
183
+ return screenshot
@@ -0,0 +1,250 @@
1
+ # ---------------------------------------------------------------------------
2
+ # 1) Desktop automation backend (PyAutoGUI)
3
+ # ---------------------------------------------------------------------------
4
+ import os
5
+ import io
6
+ from PIL import Image
7
+ from typing import Optional
8
+ from desktop_env.desktop_env import DesktopEnv
9
+ from gui_agents.agents.Action import (
10
+ Action,
11
+ Click,
12
+ DoubleClick,
13
+ Move,
14
+ Scroll,
15
+ Drag,
16
+ TypeText,
17
+ Hotkey,
18
+ Wait,
19
+ Done,
20
+ Failed,
21
+ Screenshot
22
+ )
23
+
24
+ from gui_agents.agents.Backend.Backend import Backend
25
+ import time
26
+
27
+ def screenshot_bytes_to_pil_image(screenshot_bytes: bytes) -> Optional[Image.Image]:
28
+ """
29
+ Convert the bytes data of obs["screenshot"] to a PIL Image object, preserving the original size
30
+
31
+ Args:
32
+ screenshot_bytes: The bytes data of the screenshot
33
+
34
+ Returns:
35
+ PIL Image object, or None if conversion fails
36
+ """
37
+ try:
38
+ # Create PIL Image object directly from bytes
39
+ image = Image.open(io.BytesIO(screenshot_bytes))
40
+ return image
41
+ except Exception as e:
42
+ raise RuntimeError(f"Failed to convert screenshot bytes to PIL Image: {e}")
43
+
44
+ class PyAutoGUIVMwareBackend(Backend):
45
+ """VMware desktop backend powered by *pyautogui*.
46
+
47
+ Pros : zero dependency besides Python & pyautogui.
48
+ Cons : Requires an active, visible desktop session (won't work headless).
49
+ """
50
+
51
+ _supported = {Click, DoubleClick, Move, Scroll, Drag, TypeText, Hotkey, Wait, Done, Failed, Screenshot}
52
+
53
+ # ¶ PyAutoGUI sometimes throws exceptions if mouse is moved to a corner.
54
+ def __init__(self, default_move_duration: float = 0.0, platform: str | None = None):
55
+ import pyautogui as pag # local import to avoid hard requirement
56
+ pag.FAILSAFE = False
57
+ self.pag = pag
58
+ self.default_move_duration = default_move_duration
59
+ self.platform = platform
60
+ self.use_precreate_vm = os.getenv("USE_PRECREATE_VM")
61
+ if self.use_precreate_vm is not None:
62
+ if self.use_precreate_vm == "Ubuntu":
63
+ path_to_vm = os.path.join("vmware_vm_data", "Ubuntu-x86", "Ubuntu.vmx")
64
+ elif self.use_precreate_vm == "Windows":
65
+ path_to_vm = os.path.join("vmware_vm_data", "Windows-x86", "Windows 10 x64.vmx")
66
+ else:
67
+ raise ValueError(f"USE_PRECREATE_VM={self.use_precreate_vm} is not supported. Please use Ubuntu or Windows.")
68
+
69
+ self.env = DesktopEnv(
70
+ path_to_vm=path_to_vm,
71
+ provider_name="vmware",
72
+ os_type=self.use_precreate_vm,
73
+ action_space="pyautogui",
74
+ require_a11y_tree=False
75
+ )
76
+ self.env.reset()
77
+
78
+
79
+ # ------------------------------------------------------------------
80
+ def execute(self, action: Action) -> str | None:
81
+ if not self.supports(type(action)):
82
+ raise NotImplementedError(f"{type(action).__name__} not supported by PyAutoGUIBackend")
83
+
84
+ # For automation OSWorld evaluation
85
+ if self.use_precreate_vm is None:
86
+ if isinstance(action, Click):
87
+ return self._click(action)
88
+ elif isinstance(action, DoubleClick):
89
+ return self._doubleClick(action)
90
+ elif isinstance(action, Move):
91
+ return self._move(action)
92
+ elif isinstance(action, Scroll):
93
+ return self._scroll(action)
94
+ elif isinstance(action, Drag):
95
+ return self._drag(action)
96
+ elif isinstance(action, TypeText):
97
+ return self._type(action)
98
+ elif isinstance(action, Hotkey):
99
+ return self._hotkey(action)
100
+ elif isinstance(action, Screenshot):
101
+ screenshot = self._screenshot()
102
+ return screenshot # type: ignore
103
+ elif isinstance(action, Wait):
104
+ return f"WAIT"
105
+ elif isinstance(action, Done):
106
+ return f"DONE"
107
+ elif isinstance(action, Failed):
108
+ return f"FAIL"
109
+ else:
110
+ # This shouldn't happen due to supports() check, but be safe.
111
+ raise NotImplementedError(f"Unhandled action: {action}")
112
+
113
+ # For cli_app
114
+ else:
115
+ if isinstance(action, Click):
116
+ action_pyautogui_code = self._click(action)
117
+ elif isinstance(action, DoubleClick):
118
+ action_pyautogui_code = self._doubleClick(action)
119
+ elif isinstance(action, Move):
120
+ action_pyautogui_code = self._move(action)
121
+ elif isinstance(action, Scroll):
122
+ action_pyautogui_code = self._scroll(action)
123
+ elif isinstance(action, Drag):
124
+ action_pyautogui_code = self._drag(action)
125
+ elif isinstance(action, TypeText):
126
+ action_pyautogui_code = self._type(action)
127
+ elif isinstance(action, Hotkey):
128
+ action_pyautogui_code = self._hotkey(action)
129
+ elif isinstance(action, Screenshot):
130
+ screenshot = self._screenshot()
131
+ return screenshot # type: ignore
132
+ elif isinstance(action, Wait):
133
+ action_pyautogui_code = f"WAIT"
134
+ elif isinstance(action, Done):
135
+ action_pyautogui_code = f"DONE"
136
+ elif isinstance(action, Failed):
137
+ action_pyautogui_code = f"FAIL"
138
+ else:
139
+ # This shouldn't happen due to supports() check, but be safe.
140
+ raise NotImplementedError(f"Unhandled action: {action}")
141
+
142
+ self.env.step(action_pyautogui_code)
143
+
144
+ # ----- individual helpers ------------------------------------------------
145
+ def _click(self, act: Click) -> str:
146
+ button_str = 'primary'
147
+ if act.button == 1:
148
+ button_str = "left"
149
+ elif act.button == 4:
150
+ button_str = "middle"
151
+ elif act.button == 2:
152
+ button_str = "right"
153
+
154
+ hold_keys = act.holdKey or []
155
+ code_parts = []
156
+ for k in hold_keys:
157
+ code_parts.append(f"pyautogui.keyDown('{k}')")
158
+ code_parts.append(f"time.sleep(0.05)")
159
+ code_parts.append(f"pyautogui.click(x={act.x}, y={act.y}, clicks=1, button='{button_str}', duration={self.default_move_duration}, interval=0.5)")
160
+ for k in hold_keys:
161
+ code_parts.append(f"pyautogui.keyUp('{k}')")
162
+ return "; ".join(code_parts)
163
+
164
+ def _doubleClick(self, act: DoubleClick) -> str:
165
+
166
+ button_str = 'primary'
167
+ if act.button == 1:
168
+ button_str = "left"
169
+ elif act.button == 4:
170
+ button_str = "middle"
171
+ elif act.button == 2:
172
+ button_str = "right"
173
+
174
+
175
+ hold_keys = act.holdKey or []
176
+ code_parts = []
177
+ for k in hold_keys:
178
+ code_parts.append(f"pyautogui.keyDown('{k}')")
179
+ code_parts.append(f"time.sleep(0.05)")
180
+ code_parts.append(f"pyautogui.click(x={act.x}, y={act.y}, clicks=2, button='{button_str}', duration={self.default_move_duration}, interval=0.5)")
181
+ for k in hold_keys:
182
+ code_parts.append(f"pyautogui.keyUp('{k}')")
183
+ return "; ".join(code_parts)
184
+
185
+ def _move(self, act: Move) -> str:
186
+ code_parts = []
187
+ for k in act.holdKey or []:
188
+ code_parts.append(f"pyautogui.keyDown('{k}')")
189
+ code_parts.append(f"time.sleep(0.05)")
190
+ code_parts.append(f"pyautogui.moveTo(x = {act.x}, y = {act.y})")
191
+ for k in act.holdKey or []:
192
+ code_parts.append(f"pyautogui.keyUp('{k}')")
193
+ return "; ".join(code_parts)
194
+
195
+ def _scroll(self, act: Scroll) -> str:
196
+ code_parts = []
197
+ code_parts.append(f"pyautogui.moveTo(x = {act.x}, y = {act.y})")
198
+ if act.stepVertical is None:
199
+ if act.stepHorizontal is not None:
200
+ code_parts.append(f"pyautogui.hscroll({act.stepHorizontal})")
201
+ else:
202
+ code_parts.append(f"pyautogui.vscroll({act.stepVertical})")
203
+ return "; ".join(code_parts)
204
+
205
+ def _drag(self, act: Drag) -> str:
206
+ hold_keys = act.holdKey or []
207
+ code_parts = []
208
+ for k in hold_keys:
209
+ code_parts.append(f"pyautogui.keyDown('{k}')")
210
+ code_parts.append(f"time.sleep(0.05)")
211
+
212
+ code_parts.append(f"pyautogui.moveTo(x = {act.startX}, y = {act.startY})")
213
+ code_parts.append("time.sleep(0.1)")
214
+
215
+ code_parts.append(f"pyautogui.mouseDown(button='left')")
216
+ code_parts.append("time.sleep(0.2)")
217
+
218
+ code_parts.append(f"pyautogui.moveTo(x = {act.endX}, y = {act.endY}, duration=0.5)")
219
+ code_parts.append("time.sleep(0.1)")
220
+
221
+ code_parts.append(f"pyautogui.mouseUp(button='left')")
222
+
223
+ for k in hold_keys:
224
+ code_parts.append(f"pyautogui.keyUp('{k}')")
225
+ return "; ".join(code_parts)
226
+
227
+ def _type(self, act: TypeText) -> str:
228
+ code_parts = []
229
+ code_parts.append(f"pyautogui.write('{act.text}')")
230
+ return "; ".join(code_parts)
231
+
232
+ def _hotkey(self, act: Hotkey) -> str:
233
+ code_parts = []
234
+ if act.duration is not None:
235
+ for k in act.keys or []:
236
+ code_parts.append(f"pyautogui.keyDown('{k}')")
237
+ code_parts.append(f"time.sleep({act.duration} * 1e-3)")
238
+ for k in reversed(act.keys):
239
+ code_parts.append(f"pyautogui.keyUp('{k}')")
240
+ else:
241
+ keys_str = "', '".join(act.keys)
242
+ code_parts.append(f"pyautogui.hotkey('{keys_str}', interval=0.1)")
243
+ return "; ".join(code_parts)
244
+
245
+ def _screenshot(self) -> str:
246
+ if self.use_precreate_vm is None:
247
+ return "screenshot = pyautogui.screenshot(); return screenshot"
248
+ else:
249
+ obs = self.env._get_obs()
250
+ return screenshot_bytes_to_pil_image(obs["screenshot"])
File without changes
@@ -1,12 +1,10 @@
1
1
  import json
2
2
  import logging
3
- from math import log
4
3
  import os
5
4
  import platform
6
5
  import textwrap
7
6
  from typing import Dict, List, Optional, Tuple
8
7
 
9
- from gui_agents.agents.grounding import ACI
10
8
  from gui_agents.agents.worker import Worker
11
9
  from gui_agents.agents.manager import Manager
12
10
  from gui_agents.agents.grounding import Grounding, FastGrounding
@@ -1,13 +1,8 @@
1
1
  import ast
2
2
  import re
3
3
  import logging
4
- from collections import defaultdict
5
- from io import BytesIO
6
- from typing import Any, Dict, List, Optional, Tuple, Union
4
+ from typing import Dict, List
7
5
  import time
8
- import pytesseract
9
- from PIL import Image
10
- from pytesseract import Output
11
6
 
12
7
  from gui_agents.tools.tools import Tools
13
8
  from gui_agents.utils.common_utils import parse_single_code_from_string
@@ -1,11 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
- import pyautogui
4
3
  from gui_agents.agents.Backend.Backend import Backend
5
4
  from gui_agents.agents.Backend.ADBBackend import ADBBackend
6
5
  from gui_agents.agents.Backend.LybicBackend import LybicBackend
7
- from gui_agents.agents.Backend.PyAutoGUIBackend import PyAutoGUIBackend
8
- from gui_agents.agents.Backend.PyAutoGUIVMwareBackend import PyAutoGUIVMwareBackend
6
+ try:
7
+ from gui_agents.agents.Backend.PyAutoGUIBackend import PyAutoGUIBackend
8
+ except ImportError:
9
+ PyAutoGUIBackend = None
10
+ pass
11
+ # from gui_agents.agents.Backend.PyAutoGUIVMwareBackend import PyAutoGUIVMwareBackend
9
12
  """hardware_interface.py ▸ Execute Action objects on real devices / emulators
10
13
  ===============================================================================
11
14
  This module is the *single entry point* that upper‑layer planners / executors
@@ -16,8 +19,8 @@ use to perform UI operations. It is deliberately thin:
16
19
  into platform‑specific calls (PyAutoGUI, ADB, Lybic cloud device, …).
17
20
  * Performs minimal capability checks + error propagation.
18
21
 
19
- The default backend implemented here is **PyAutoGUIBackend**. Stubs for
20
- **ADBBackend** and **LybicBackend** show how to extend the system.
22
+ The default backend implemented here is **PyAutoGUIBackend**.
23
+ Available backends: **ADBBackend**, **LybicBackend**, and **PyAutoGUIVMwareBackend**.
21
24
 
22
25
  --------------------------------------------------------------------------
23
26
  Quick usage
@@ -27,6 +30,8 @@ from actions import Click
27
30
  from hardware_interface import HardwareInterface
28
31
 
29
32
  hwi = HardwareInterface(backend="pyautogui")
33
+ # Or use Lybic SDK backend
34
+ hwi_lybic = HardwareInterface(backend="lybic_sdk")
30
35
 
31
36
  # Single action
32
37
  hwi.dispatch(Click(xy=(960, 540)))
@@ -55,7 +60,7 @@ __all__ = [
55
60
  "PyAutoGUIBackend",
56
61
  "ADBBackend",
57
62
  "LybicBackend",
58
- "PyAutoGUIVMwareBackend",
63
+ # "PyAutoGUIVMwareBackend",
59
64
  ]
60
65
 
61
66
 
@@ -70,8 +75,9 @@ class HardwareInterface:
70
75
  "pyautogui": PyAutoGUIBackend,
71
76
  "adb": ADBBackend,
72
77
  "lybic": LybicBackend,
73
- "pyautogui_vmware": PyAutoGUIVMwareBackend,
74
78
  }
79
+ if PyAutoGUIBackend is not None:
80
+ BACKEND_MAP["pyautogui_vmware"] = PyAutoGUIBackend
75
81
 
76
82
  # ------------------------------------------------------------------
77
83
  def __init__(self, backend: str | Backend = "pyautogui", **backend_kwargs):
@@ -81,6 +87,17 @@ class HardwareInterface:
81
87
  key = backend.lower()
82
88
  if key not in self.BACKEND_MAP:
83
89
  raise ValueError(f"Unsupported backend '{backend}'. Available: {list(self.BACKEND_MAP)}")
90
+
91
+ # For GUI backends, provide helpful error message in headless environments
92
+ if key in ["pyautogui", "pyautogui_vmware"]:
93
+ import os
94
+ if os.name == 'posix' and not os.environ.get('DISPLAY'):
95
+ raise RuntimeError(
96
+ f"Cannot create '{backend}' backend: No DISPLAY environment variable found. "
97
+ f"This typically occurs in headless/containerized environments. "
98
+ f"Consider using 'lybic' or 'adb' backend instead."
99
+ )
100
+
84
101
  self.backend = self.BACKEND_MAP[key](**backend_kwargs)
85
102
 
86
103
  # ------------------------------------------------------------------
@@ -4,7 +4,6 @@ from collections import defaultdict
4
4
  from typing import Dict, List, Optional, Tuple
5
5
  import platform
6
6
 
7
- from gui_agents.agents.grounding import ACI
8
7
  from gui_agents.core.knowledge import KnowledgeBase
9
8
  from gui_agents.agents.global_state import GlobalState
10
9
  from gui_agents.store.registry import Registry
@@ -15,8 +14,6 @@ from gui_agents.utils.common_utils import (
15
14
  agent_log_to_string,
16
15
  )
17
16
  from gui_agents.tools.tools import Tools
18
- from PIL import Image
19
- import io
20
17
 
21
18
  logger = logging.getLogger("desktopenv.agent")
22
19
 
@@ -5,7 +5,7 @@ format strictly follows computer-use schema.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
8
- import ast, json
8
+ import ast
9
9
  from typing import List, Dict
10
10
 
11
11
 
@@ -1,12 +1,11 @@
1
1
  import logging
2
2
  import re
3
3
  import textwrap
4
- from typing import Dict, List, Tuple
4
+ from typing import Dict, List
5
5
  import platform
6
6
  import os
7
7
  import json
8
8
 
9
- from gui_agents.agents.grounding import ACI
10
9
  from gui_agents.core.knowledge import KnowledgeBase
11
10
  from gui_agents.utils.common_utils import (
12
11
  Node,