lybic-guiagents 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lybic-guiagents might be problematic. Click here for more details.
- desktop_env/__init__.py +1 -0
- desktop_env/actions.py +203 -0
- desktop_env/controllers/__init__.py +0 -0
- desktop_env/controllers/python.py +471 -0
- desktop_env/controllers/setup.py +882 -0
- desktop_env/desktop_env.py +509 -0
- desktop_env/evaluators/__init__.py +5 -0
- desktop_env/evaluators/getters/__init__.py +41 -0
- desktop_env/evaluators/getters/calc.py +15 -0
- desktop_env/evaluators/getters/chrome.py +1774 -0
- desktop_env/evaluators/getters/file.py +154 -0
- desktop_env/evaluators/getters/general.py +42 -0
- desktop_env/evaluators/getters/gimp.py +38 -0
- desktop_env/evaluators/getters/impress.py +126 -0
- desktop_env/evaluators/getters/info.py +24 -0
- desktop_env/evaluators/getters/misc.py +406 -0
- desktop_env/evaluators/getters/replay.py +20 -0
- desktop_env/evaluators/getters/vlc.py +86 -0
- desktop_env/evaluators/getters/vscode.py +35 -0
- desktop_env/evaluators/metrics/__init__.py +160 -0
- desktop_env/evaluators/metrics/basic_os.py +68 -0
- desktop_env/evaluators/metrics/chrome.py +493 -0
- desktop_env/evaluators/metrics/docs.py +1011 -0
- desktop_env/evaluators/metrics/general.py +665 -0
- desktop_env/evaluators/metrics/gimp.py +637 -0
- desktop_env/evaluators/metrics/libreoffice.py +28 -0
- desktop_env/evaluators/metrics/others.py +92 -0
- desktop_env/evaluators/metrics/pdf.py +31 -0
- desktop_env/evaluators/metrics/slides.py +957 -0
- desktop_env/evaluators/metrics/table.py +585 -0
- desktop_env/evaluators/metrics/thunderbird.py +176 -0
- desktop_env/evaluators/metrics/utils.py +719 -0
- desktop_env/evaluators/metrics/vlc.py +524 -0
- desktop_env/evaluators/metrics/vscode.py +283 -0
- desktop_env/providers/__init__.py +35 -0
- desktop_env/providers/aws/__init__.py +0 -0
- desktop_env/providers/aws/manager.py +278 -0
- desktop_env/providers/aws/provider.py +186 -0
- desktop_env/providers/aws/provider_with_proxy.py +315 -0
- desktop_env/providers/aws/proxy_pool.py +193 -0
- desktop_env/providers/azure/__init__.py +0 -0
- desktop_env/providers/azure/manager.py +87 -0
- desktop_env/providers/azure/provider.py +207 -0
- desktop_env/providers/base.py +97 -0
- desktop_env/providers/gcp/__init__.py +0 -0
- desktop_env/providers/gcp/manager.py +0 -0
- desktop_env/providers/gcp/provider.py +0 -0
- desktop_env/providers/virtualbox/__init__.py +0 -0
- desktop_env/providers/virtualbox/manager.py +463 -0
- desktop_env/providers/virtualbox/provider.py +124 -0
- desktop_env/providers/vmware/__init__.py +0 -0
- desktop_env/providers/vmware/manager.py +455 -0
- desktop_env/providers/vmware/provider.py +105 -0
- gui_agents/__init__.py +0 -0
- gui_agents/agents/Action.py +209 -0
- gui_agents/agents/__init__.py +0 -0
- gui_agents/agents/agent_s.py +832 -0
- gui_agents/agents/global_state.py +610 -0
- gui_agents/agents/grounding.py +651 -0
- gui_agents/agents/hardware_interface.py +129 -0
- gui_agents/agents/manager.py +568 -0
- gui_agents/agents/translator.py +132 -0
- gui_agents/agents/worker.py +355 -0
- gui_agents/cli_app.py +560 -0
- gui_agents/core/__init__.py +0 -0
- gui_agents/core/engine.py +1496 -0
- gui_agents/core/knowledge.py +449 -0
- gui_agents/core/mllm.py +555 -0
- gui_agents/tools/__init__.py +0 -0
- gui_agents/tools/tools.py +727 -0
- gui_agents/unit_test/__init__.py +0 -0
- gui_agents/unit_test/run_tests.py +65 -0
- gui_agents/unit_test/test_manager.py +330 -0
- gui_agents/unit_test/test_worker.py +269 -0
- gui_agents/utils/__init__.py +0 -0
- gui_agents/utils/analyze_display.py +301 -0
- gui_agents/utils/common_utils.py +263 -0
- gui_agents/utils/display_viewer.py +281 -0
- gui_agents/utils/embedding_manager.py +53 -0
- gui_agents/utils/image_axis_utils.py +27 -0
- lybic_guiagents-0.1.0.dist-info/METADATA +416 -0
- lybic_guiagents-0.1.0.dist-info/RECORD +85 -0
- lybic_guiagents-0.1.0.dist-info/WHEEL +5 -0
- lybic_guiagents-0.1.0.dist-info/licenses/LICENSE +201 -0
- lybic_guiagents-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,509 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
import re
|
|
7
|
+
from typing import Callable, Any, Optional, Tuple
|
|
8
|
+
from typing import List, Dict, Union
|
|
9
|
+
|
|
10
|
+
import gymnasium as gym
|
|
11
|
+
|
|
12
|
+
from desktop_env.controllers.python import PythonController
|
|
13
|
+
from desktop_env.controllers.setup import SetupController
|
|
14
|
+
from desktop_env.evaluators import metrics, getters
|
|
15
|
+
from desktop_env.providers import create_vm_manager_and_provider
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("desktopenv.env")
|
|
18
|
+
|
|
19
|
+
Metric = Callable[[Any, Any], float]
|
|
20
|
+
Getter = Callable[[gym.Env, Dict[str, Any]], Any]
|
|
21
|
+
|
|
22
|
+
MAX_RETRIES = 5 # Maximum retries for environment setup
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _fix_pyautogui_less_than_bug(command: str) -> str:
|
|
27
|
+
"""
|
|
28
|
+
Fix PyAutoGUI '<' character bug by converting it to hotkey("shift", ',') calls.
|
|
29
|
+
|
|
30
|
+
This fixes the known PyAutoGUI issue where typing '<' produces '>' instead.
|
|
31
|
+
References:
|
|
32
|
+
- https://github.com/asweigart/pyautogui/issues/198
|
|
33
|
+
- https://github.com/xlang-ai/OSWorld/issues/257
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
command (str): The original pyautogui command
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
str: The fixed command with '<' characters handled properly
|
|
40
|
+
"""
|
|
41
|
+
# Handle typewrite with '<' characters
|
|
42
|
+
def replace_typewrite_less_than(match):
|
|
43
|
+
content = match.group(1)
|
|
44
|
+
# Split the content by '<' and rebuild with hotkey calls
|
|
45
|
+
parts = content.split('<')
|
|
46
|
+
if len(parts) == 1:
|
|
47
|
+
# No '<' found, return original
|
|
48
|
+
return match.group(0)
|
|
49
|
+
|
|
50
|
+
# Rebuild the command
|
|
51
|
+
result_parts = []
|
|
52
|
+
for i, part in enumerate(parts):
|
|
53
|
+
if i == 0:
|
|
54
|
+
# First part, just add typewrite if not empty
|
|
55
|
+
if part:
|
|
56
|
+
result_parts.append(f"pyautogui.typewrite({repr(part)})")
|
|
57
|
+
else:
|
|
58
|
+
# Add hotkey for '<' and then typewrite for the rest if not empty
|
|
59
|
+
result_parts.append('pyautogui.hotkey("shift", ",")')
|
|
60
|
+
if part:
|
|
61
|
+
result_parts.append(f"pyautogui.typewrite({repr(part)})")
|
|
62
|
+
|
|
63
|
+
return '; '.join(result_parts)
|
|
64
|
+
|
|
65
|
+
# Handle press('<') calls
|
|
66
|
+
def replace_press_less_than(match):
|
|
67
|
+
return 'pyautogui.hotkey("shift", ",")'
|
|
68
|
+
|
|
69
|
+
# Pattern to match typewrite calls with quoted strings
|
|
70
|
+
typewrite_pattern = r'pyautogui\.typewrite\((["\'])(.*?)\1\)'
|
|
71
|
+
# Pattern to match press('<') calls
|
|
72
|
+
press_pattern = r'pyautogui\.press\(["\']<["\']\)'
|
|
73
|
+
|
|
74
|
+
# First handle press('<') calls
|
|
75
|
+
command = re.sub(press_pattern, replace_press_less_than, command)
|
|
76
|
+
|
|
77
|
+
# Then handle typewrite calls
|
|
78
|
+
def process_typewrite_match(match):
|
|
79
|
+
quote_char = match.group(1)
|
|
80
|
+
content = match.group(2)
|
|
81
|
+
|
|
82
|
+
# Check if content contains '<'
|
|
83
|
+
if '<' not in content:
|
|
84
|
+
return match.group(0)
|
|
85
|
+
|
|
86
|
+
# Split by '<' and rebuild
|
|
87
|
+
parts = content.split('<')
|
|
88
|
+
result_parts = []
|
|
89
|
+
|
|
90
|
+
for i, part in enumerate(parts):
|
|
91
|
+
if i == 0:
|
|
92
|
+
# First part
|
|
93
|
+
if part:
|
|
94
|
+
result_parts.append(f"pyautogui.typewrite({quote_char}{part}{quote_char})")
|
|
95
|
+
else:
|
|
96
|
+
# Add hotkey for '<' and then typewrite for the rest
|
|
97
|
+
result_parts.append('pyautogui.hotkey("shift", ",")')
|
|
98
|
+
if part:
|
|
99
|
+
result_parts.append(f"pyautogui.typewrite({quote_char}{part}{quote_char})")
|
|
100
|
+
|
|
101
|
+
return '; '.join(result_parts)
|
|
102
|
+
|
|
103
|
+
command = re.sub(typewrite_pattern, process_typewrite_match, command)
|
|
104
|
+
|
|
105
|
+
return command
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class DesktopEnv(gym.Env):
|
|
109
|
+
"""
|
|
110
|
+
DesktopEnv with OpenAI Gym interface. It provides a desktop environment for setting and evaluating desktop automation tasks.
|
|
111
|
+
"""
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
provider_name: str = "aws",
|
|
115
|
+
region: str = None,
|
|
116
|
+
path_to_vm: str = None,
|
|
117
|
+
snapshot_name: str = "init_state",
|
|
118
|
+
action_space: str = "computer_13",
|
|
119
|
+
cache_dir: str = "cache",
|
|
120
|
+
screen_size: Tuple[int] = (int(os.environ.get("SCREEN_WIDTH", 1920)), int(os.environ.get("SCREEN_HEIGHT", 1080))),
|
|
121
|
+
headless: bool = False,
|
|
122
|
+
require_a11y_tree: bool = True,
|
|
123
|
+
require_terminal: bool = False,
|
|
124
|
+
os_type: str = "Ubuntu",
|
|
125
|
+
enable_proxy: bool = False,
|
|
126
|
+
client_password: str = "",
|
|
127
|
+
):
|
|
128
|
+
"""
|
|
129
|
+
Args:
|
|
130
|
+
provider_name (str): virtualization provider name, default to "vmware"
|
|
131
|
+
region (str): the region for allocate machines, work for cloud services, default to "us-east-1"
|
|
132
|
+
path_to_vm (str): path to .vmx file
|
|
133
|
+
snapshot_name (str): snapshot name to revert to, default to "init_state"
|
|
134
|
+
action_space (str): "computer_13" | "pyautogui"
|
|
135
|
+
cache_dir (str): cache directory to cache task-related stuffs like
|
|
136
|
+
reference file for evaluation
|
|
137
|
+
screen_size (Tuple[int]): screen size of the VM
|
|
138
|
+
headless (bool): whether to run the VM in headless mode
|
|
139
|
+
require_a11y_tree (bool): whether to require accessibility tree
|
|
140
|
+
require_terminal (bool): whether to require terminal output
|
|
141
|
+
os_type (str): operating system type, default to "Ubuntu"
|
|
142
|
+
enable_proxy (bool): whether to enable proxy support, default to False
|
|
143
|
+
"""
|
|
144
|
+
# Initialize VM manager and vitualization provider
|
|
145
|
+
self.region = region
|
|
146
|
+
self.provider_name = provider_name
|
|
147
|
+
self.enable_proxy = enable_proxy # Store proxy enablement setting
|
|
148
|
+
if client_password == "":
|
|
149
|
+
if self.provider_name == "aws":
|
|
150
|
+
self.client_password = "osworld-public-evaluation"
|
|
151
|
+
else:
|
|
152
|
+
self.client_password = "password"
|
|
153
|
+
else:
|
|
154
|
+
self.client_password = client_password
|
|
155
|
+
|
|
156
|
+
self.screen_width = screen_size[0]
|
|
157
|
+
self.screen_height = screen_size[1]
|
|
158
|
+
|
|
159
|
+
# Default
|
|
160
|
+
self.server_port = 5000
|
|
161
|
+
self.chromium_port = 9222
|
|
162
|
+
self.vnc_port = 8006
|
|
163
|
+
self.vlc_port = 8080
|
|
164
|
+
|
|
165
|
+
# Initialize with default (no proxy) provider
|
|
166
|
+
self.current_use_proxy = False
|
|
167
|
+
self.manager, self.provider = create_vm_manager_and_provider(provider_name, region, use_proxy=False)
|
|
168
|
+
|
|
169
|
+
self.os_type = os_type
|
|
170
|
+
|
|
171
|
+
# Track whether environment has been used (step/setup) to optimize snapshot revert
|
|
172
|
+
# docker, aws, gcp, azure are always unused as the emulator starts from a clean state
|
|
173
|
+
# vmware, virtualbox are always used as the emulator starts from a dirty state
|
|
174
|
+
if self.provider_name in {"docker", "aws", "gcp", "azure"}:
|
|
175
|
+
self.is_environment_used = False
|
|
176
|
+
elif self.provider_name in {"vmware", "virtualbox"}:
|
|
177
|
+
self.is_environment_used = True
|
|
178
|
+
else:
|
|
179
|
+
raise ValueError(f"Invalid provider name: {self.provider_name}")
|
|
180
|
+
|
|
181
|
+
# Initialize environment variables
|
|
182
|
+
if path_to_vm:
|
|
183
|
+
self.path_to_vm = os.path.abspath(os.path.expandvars(os.path.expanduser(path_to_vm))) \
|
|
184
|
+
if provider_name in {"vmware", "virtualbox"} else path_to_vm
|
|
185
|
+
else:
|
|
186
|
+
|
|
187
|
+
self.path_to_vm = self.manager.get_vm_path(os_type=self.os_type, region=region, screen_size=(self.screen_width, self.screen_height))
|
|
188
|
+
try:
|
|
189
|
+
self.snapshot_name = snapshot_name
|
|
190
|
+
self.cache_dir_base: str = cache_dir
|
|
191
|
+
# todo: add the logic to get the screen size from the VM
|
|
192
|
+
self.headless = headless
|
|
193
|
+
self.require_a11y_tree = require_a11y_tree
|
|
194
|
+
self.require_terminal = require_terminal
|
|
195
|
+
|
|
196
|
+
# Initialize emulator and controller
|
|
197
|
+
if provider_name != "docker": # Check if this is applicable to other VM providers
|
|
198
|
+
logger.info("Initializing...")
|
|
199
|
+
self._start_emulator()
|
|
200
|
+
|
|
201
|
+
# mode: human or machine
|
|
202
|
+
self.instruction = None
|
|
203
|
+
assert action_space in ["computer_13", "pyautogui", "claude_computer_use"]
|
|
204
|
+
self.action_space = action_space # todo: refactor it to the ActType
|
|
205
|
+
|
|
206
|
+
# episodic stuffs, like counters, will be updated or reset
|
|
207
|
+
# when calling self.reset()
|
|
208
|
+
self._traj_no: int = -1
|
|
209
|
+
self._step_no: int = 0
|
|
210
|
+
self.action_history: List[Dict[str, any]] = []
|
|
211
|
+
except Exception as e:
|
|
212
|
+
logger.error(f"Failed to initialize DesktopEnv: {e}")
|
|
213
|
+
# If initialization fails, we should clean up the VM
|
|
214
|
+
try:
|
|
215
|
+
self.close()
|
|
216
|
+
self.manager.delete_vm(self.path_to_vm, self.region)
|
|
217
|
+
logger.info(f"Cleaned up VM {self.path_to_vm}.")
|
|
218
|
+
except Exception as cleanup_error:
|
|
219
|
+
logger.error(f"Failed to clean up VM {self.path_to_vm}: {cleanup_error}")
|
|
220
|
+
raise
|
|
221
|
+
|
|
222
|
+
def _start_emulator(self):
|
|
223
|
+
# Power on the virtual machine
|
|
224
|
+
self.provider.start_emulator(self.path_to_vm, self.headless, self.os_type)
|
|
225
|
+
|
|
226
|
+
# Get the ip from the virtual machine, and setup the controller
|
|
227
|
+
vm_ip_ports = self.provider.get_ip_address(self.path_to_vm).split(':')
|
|
228
|
+
self.vm_ip = vm_ip_ports[0]
|
|
229
|
+
if len(vm_ip_ports) > 1:
|
|
230
|
+
self.server_port = int(vm_ip_ports[1])
|
|
231
|
+
self.chromium_port = int(vm_ip_ports[2])
|
|
232
|
+
self.vnc_port = int(vm_ip_ports[3])
|
|
233
|
+
self.vlc_port = int(vm_ip_ports[4])
|
|
234
|
+
self.controller = PythonController(vm_ip=self.vm_ip, server_port=self.server_port)
|
|
235
|
+
self.setup_controller = SetupController(vm_ip=self.vm_ip, server_port=self.server_port, chromium_port=self.chromium_port, vlc_port=self.vlc_port, cache_dir=self.cache_dir_base, client_password=self.client_password, screen_width=self.screen_width, screen_height=self.screen_height)
|
|
236
|
+
|
|
237
|
+
def _revert_to_snapshot(self):
|
|
238
|
+
# Revert to certain snapshot of the virtual machine, and refresh the path to vm and ip of vm
|
|
239
|
+
# due to the fact it could be changed when implemented by cloud services
|
|
240
|
+
path_to_vm = self.provider.revert_to_snapshot(self.path_to_vm, self.snapshot_name)
|
|
241
|
+
if path_to_vm and not path_to_vm == self.path_to_vm:
|
|
242
|
+
# path_to_vm has to be a new path
|
|
243
|
+
|
|
244
|
+
self.manager.delete_vm(self.path_to_vm, self.region)
|
|
245
|
+
self.manager.add_vm(path_to_vm, self.region)
|
|
246
|
+
self.manager.occupy_vm(path_to_vm, os.getpid(), self.region)
|
|
247
|
+
self.path_to_vm = path_to_vm
|
|
248
|
+
|
|
249
|
+
def _save_state(self, snapshot_name=None):
|
|
250
|
+
# Save the current virtual machine state to a certain snapshot name
|
|
251
|
+
self.provider.save_state(self.path_to_vm, snapshot_name)
|
|
252
|
+
|
|
253
|
+
def close(self):
|
|
254
|
+
# Close (release) the virtual machine
|
|
255
|
+
self.provider.stop_emulator(self.path_to_vm)
|
|
256
|
+
|
|
257
|
+
def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None) -> Dict[str, Any]:
|
|
258
|
+
|
|
259
|
+
# Reset to certain task in OSWorld
|
|
260
|
+
logger.info("Resetting environment...")
|
|
261
|
+
logger.info("Switching task...")
|
|
262
|
+
logger.info("Setting counters...")
|
|
263
|
+
self._traj_no += 1
|
|
264
|
+
self._step_no = 0
|
|
265
|
+
self.action_history.clear()
|
|
266
|
+
|
|
267
|
+
for attempt in range(MAX_RETRIES):
|
|
268
|
+
# Check and handle proxy requirement changes BEFORE starting emulator
|
|
269
|
+
if task_config is not None:
|
|
270
|
+
# Only consider task proxy requirement if proxy is enabled at system level
|
|
271
|
+
task_use_proxy = task_config.get("proxy", False) and self.enable_proxy
|
|
272
|
+
if not self.enable_proxy and task_config.get("proxy", False):
|
|
273
|
+
logger.info("Task requires proxy but proxy is disabled at system level, ignoring proxy requirement.")
|
|
274
|
+
|
|
275
|
+
if task_use_proxy != self.current_use_proxy:
|
|
276
|
+
# keep because get_info_from_website depend on this
|
|
277
|
+
self.current_use_proxy = task_use_proxy
|
|
278
|
+
|
|
279
|
+
# Only revert to snapshot if environment has been used (step/setup)
|
|
280
|
+
# This optimization is especially important for cloud providers like AWS
|
|
281
|
+
# where unnecessary snapshot operations are costly and time-consuming
|
|
282
|
+
if self.is_environment_used:
|
|
283
|
+
logger.info("Environment has been used, reverting to snapshot {}...".format(self.snapshot_name))
|
|
284
|
+
self._revert_to_snapshot()
|
|
285
|
+
logger.info("Starting emulator...")
|
|
286
|
+
self._start_emulator()
|
|
287
|
+
logger.info("Emulator started.")
|
|
288
|
+
# Reset the usage flag after reverting
|
|
289
|
+
self.is_environment_used = False
|
|
290
|
+
else:
|
|
291
|
+
logger.info("Environment is clean, skipping snapshot revert (provider: {}).".format(self.provider_name))
|
|
292
|
+
|
|
293
|
+
if task_config is not None:
|
|
294
|
+
if task_config.get("proxy", False) and self.enable_proxy:
|
|
295
|
+
# If using proxy and proxy is enabled, set up the proxy configuration
|
|
296
|
+
self.setup_controller._proxy_setup(self.client_password)
|
|
297
|
+
self._set_task_info(task_config)
|
|
298
|
+
self.setup_controller.reset_cache_dir(self.cache_dir)
|
|
299
|
+
logger.info("Setting up environment...")
|
|
300
|
+
success = self.setup_controller.setup(self.config, task_config.get("proxy", False) and self.enable_proxy)
|
|
301
|
+
if success:
|
|
302
|
+
# Mark environment as used when setup is successfully executed
|
|
303
|
+
if self.config: # Only mark as used if there were actual setup operations
|
|
304
|
+
self.is_environment_used = True
|
|
305
|
+
break
|
|
306
|
+
else:
|
|
307
|
+
logger.error(
|
|
308
|
+
"Environment setup failed, retrying (%d/%d)...",
|
|
309
|
+
attempt + 1,
|
|
310
|
+
MAX_RETRIES,
|
|
311
|
+
)
|
|
312
|
+
time.sleep(5)
|
|
313
|
+
else:
|
|
314
|
+
break
|
|
315
|
+
|
|
316
|
+
logger.info("Environment setup complete.")
|
|
317
|
+
|
|
318
|
+
observation = self._get_obs()
|
|
319
|
+
return observation
|
|
320
|
+
|
|
321
|
+
def _get_obs(self):
|
|
322
|
+
# We provide screenshot, accessibility_tree (optional), terminal (optional), and instruction.
|
|
323
|
+
# can be customized and scaled
|
|
324
|
+
return {
|
|
325
|
+
"screenshot": self.controller.get_screenshot(),
|
|
326
|
+
"accessibility_tree": self.controller.get_accessibility_tree() if self.require_a11y_tree else None,
|
|
327
|
+
"terminal": self.controller.get_terminal_output() if self.require_terminal else None,
|
|
328
|
+
"instruction": self.instruction
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
@property
|
|
332
|
+
def vm_platform(self):
|
|
333
|
+
return self.controller.get_vm_platform()
|
|
334
|
+
|
|
335
|
+
@property
|
|
336
|
+
def vm_screen_size(self):
|
|
337
|
+
return self.controller.get_vm_screen_size()
|
|
338
|
+
|
|
339
|
+
def _set_task_info(self, task_config: Dict[str, Any]):
|
|
340
|
+
"""Set task info (proxy logic is handled in reset method)"""
|
|
341
|
+
self.task_id: str = task_config["id"]
|
|
342
|
+
self.cache_dir: str = os.path.join(self.cache_dir_base, self.task_id)
|
|
343
|
+
os.makedirs(self.cache_dir, exist_ok=True)
|
|
344
|
+
self.instruction = task_config["instruction"]
|
|
345
|
+
self.config = task_config["config"] if "config" in task_config else []
|
|
346
|
+
|
|
347
|
+
self._set_evaluator_info(task_config)
|
|
348
|
+
|
|
349
|
+
def _set_evaluator_info(self, task_config: Dict[str, Any]):
|
|
350
|
+
"""Set evaluator information from task config"""
|
|
351
|
+
# evaluator dict
|
|
352
|
+
# func -> metric function string, or list of metric function strings
|
|
353
|
+
# conj -> conjunction of multiple metrics if func is a list with length > 1, "and"/"or"
|
|
354
|
+
# result -> result getter config, or list of result getter configs
|
|
355
|
+
# expected (optional) -> expected getter config, or list of expected getter configs
|
|
356
|
+
# options (optional) -> metric options, or list of metric options
|
|
357
|
+
# if func is a str list, then result, expected (if exists), options (if exists) should also be lists of the same length
|
|
358
|
+
# even if one of the metrics does not need expected or options field, it should be included in the list with None
|
|
359
|
+
self.evaluator = task_config["evaluator"]
|
|
360
|
+
self.metric: Metric = [getattr(metrics, func) for func in self.evaluator["func"]] \
|
|
361
|
+
if isinstance(self.evaluator["func"], list) \
|
|
362
|
+
else getattr(metrics, self.evaluator["func"])
|
|
363
|
+
self.metric_conj: str = self.evaluator.get("conj", "and") # take conjunction of multiple metrics
|
|
364
|
+
if "result" in self.evaluator and len(self.evaluator["result"]) > 0:
|
|
365
|
+
self.result_getter: Getter = [getattr(getters, "get_{:}".format(res["type"])) for res in
|
|
366
|
+
self.evaluator["result"]] \
|
|
367
|
+
if isinstance(self.evaluator["result"], list) \
|
|
368
|
+
else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
|
|
369
|
+
else:
|
|
370
|
+
self.result_getter = [None] * len(self.metric) \
|
|
371
|
+
if isinstance(self.metric, list) \
|
|
372
|
+
else None
|
|
373
|
+
|
|
374
|
+
if "expected" in self.evaluator and len(self.evaluator["expected"]) > 0:
|
|
375
|
+
self.expected_getter: Getter = [getattr(getters, "get_{:}".format(exp["type"])) if exp else None for exp in
|
|
376
|
+
self.evaluator["expected"]] \
|
|
377
|
+
if isinstance(self.evaluator["expected"], list) \
|
|
378
|
+
else getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"]))
|
|
379
|
+
else:
|
|
380
|
+
self.expected_getter = [None] * len(self.metric) \
|
|
381
|
+
if isinstance(self.metric, list) \
|
|
382
|
+
else None
|
|
383
|
+
self.metric_options: Union[List[Dict[str, Any]], Dict[str, Any]] = [opt if opt else {} for opt in
|
|
384
|
+
self.evaluator["options"]] \
|
|
385
|
+
if isinstance(self.evaluator.get("options", {}), list) \
|
|
386
|
+
else self.evaluator["options"] \
|
|
387
|
+
if "options" in self.evaluator \
|
|
388
|
+
else [{}] * len(self.metric) \
|
|
389
|
+
if isinstance(self.metric, list) \
|
|
390
|
+
else {}
|
|
391
|
+
|
|
392
|
+
assert (not isinstance(self.evaluator["func"], list)
|
|
393
|
+
or (len(self.metric) == len(self.result_getter) == len(self.expected_getter) == len(
|
|
394
|
+
self.metric_options)))
|
|
395
|
+
|
|
396
|
+
def step(self, action, pause=2):
|
|
397
|
+
self._step_no += 1
|
|
398
|
+
self.action_history.append(action)
|
|
399
|
+
|
|
400
|
+
# Mark environment as used when step is called
|
|
401
|
+
self.is_environment_used = True
|
|
402
|
+
|
|
403
|
+
reward = 0 # todo: Define reward calculation for each example
|
|
404
|
+
done = False # todo: Define episode termination condition for each example
|
|
405
|
+
info = {}
|
|
406
|
+
logger.info(f"Step {self._step_no} in trajectory {self._traj_no} with action: {action}")
|
|
407
|
+
# handle the special actions
|
|
408
|
+
if action in ['WAIT', 'FAIL', 'DONE'] or (type(action) == dict and action['action_type'] in ['WAIT', 'FAIL', 'DONE']):
|
|
409
|
+
if action == 'WAIT':
|
|
410
|
+
time.sleep(pause)
|
|
411
|
+
elif action == 'FAIL':
|
|
412
|
+
done = True
|
|
413
|
+
info = {"fail": True}
|
|
414
|
+
elif action == 'DONE':
|
|
415
|
+
done = True
|
|
416
|
+
info = {"done": True}
|
|
417
|
+
|
|
418
|
+
if self.action_space == "computer_13":
|
|
419
|
+
# the set of all possible actions defined in the action representation
|
|
420
|
+
self.controller.execute_action(action)
|
|
421
|
+
elif self.action_space == "pyautogui" or self.action_space == "claude_computer_use":
|
|
422
|
+
if action in ['WAIT', 'FAIL', 'DONE']:
|
|
423
|
+
self.controller.execute_action(action)
|
|
424
|
+
else:
|
|
425
|
+
# the set of all possible python commands insides `pyautogui`
|
|
426
|
+
if type(action) == str:
|
|
427
|
+
# Fix PyAutoGUI '<' character bug before execution
|
|
428
|
+
fixed_command = _fix_pyautogui_less_than_bug(action)
|
|
429
|
+
self.controller.execute_python_command(fixed_command)
|
|
430
|
+
elif type(action) == dict:
|
|
431
|
+
# Fix PyAutoGUI '<' character bug before execution
|
|
432
|
+
fixed_command = _fix_pyautogui_less_than_bug(action['command'])
|
|
433
|
+
self.controller.execute_python_command(fixed_command)
|
|
434
|
+
|
|
435
|
+
time.sleep(pause)
|
|
436
|
+
observation = self._get_obs()
|
|
437
|
+
|
|
438
|
+
return observation, reward, done, info
|
|
439
|
+
|
|
440
|
+
def evaluate(self):
|
|
441
|
+
"""
|
|
442
|
+
Evaluate whether the task is successfully completed.
|
|
443
|
+
"""
|
|
444
|
+
|
|
445
|
+
postconfig = self.evaluator.get("postconfig", [])
|
|
446
|
+
self.setup_controller.setup(postconfig)
|
|
447
|
+
# Mark environment as used if there were postconfig setup operations
|
|
448
|
+
if postconfig:
|
|
449
|
+
self.is_environment_used = True
|
|
450
|
+
|
|
451
|
+
if self.evaluator['func'] == "infeasible":
|
|
452
|
+
if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":
|
|
453
|
+
return 1
|
|
454
|
+
else:
|
|
455
|
+
return 0
|
|
456
|
+
else:
|
|
457
|
+
if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":
|
|
458
|
+
return 0
|
|
459
|
+
|
|
460
|
+
if type(self.metric) == list:
|
|
461
|
+
# Multiple metrics to evaluate whether the task is successfully completed
|
|
462
|
+
results = []
|
|
463
|
+
assert len(self.metric) == len(self.result_getter), "The number of metrics and result getters must be the same"
|
|
464
|
+
if "expected" in self.evaluator:
|
|
465
|
+
assert len(self.metric) == len(self.expected_getter), "The number of metrics and expected getters must be the same"
|
|
466
|
+
for idx, metric in enumerate(self.metric):
|
|
467
|
+
try:
|
|
468
|
+
config = self.evaluator["result"][idx]
|
|
469
|
+
result_state = self.result_getter[idx](self, config)
|
|
470
|
+
except FileNotFoundError:
|
|
471
|
+
logger.error("File not found!")
|
|
472
|
+
if self.metric_conj == 'and':
|
|
473
|
+
return 0
|
|
474
|
+
|
|
475
|
+
if "expected" in self.evaluator and self.expected_getter and self.evaluator["expected"]:
|
|
476
|
+
expected_state = self.expected_getter[idx](self, self.evaluator["expected"][idx])
|
|
477
|
+
metric: int = metric(result_state, expected_state, **self.metric_options[idx])
|
|
478
|
+
else:
|
|
479
|
+
metric: int = metric(result_state, **self.metric_options[idx])
|
|
480
|
+
|
|
481
|
+
if self.metric_conj == 'and' and float(metric) == 0.0:
|
|
482
|
+
return 0
|
|
483
|
+
elif self.metric_conj == 'or' and float(metric) == 1.0:
|
|
484
|
+
return 1
|
|
485
|
+
else:
|
|
486
|
+
results.append(metric)
|
|
487
|
+
|
|
488
|
+
return sum(results) / len(results) if self.metric_conj == 'and' else max(results)
|
|
489
|
+
else:
|
|
490
|
+
# Single metric to evaluate whether the task is successfully completed
|
|
491
|
+
try:
|
|
492
|
+
result_state = self.result_getter(self, self.evaluator["result"])
|
|
493
|
+
except FileNotFoundError:
|
|
494
|
+
logger.error("File not found!")
|
|
495
|
+
return 0
|
|
496
|
+
|
|
497
|
+
if "expected" in self.evaluator and self.expected_getter and self.evaluator["expected"]:
|
|
498
|
+
expected_state = self.expected_getter(self, self.evaluator["expected"])
|
|
499
|
+
metric: float = self.metric(result_state, expected_state, **self.metric_options)
|
|
500
|
+
else:
|
|
501
|
+
metric: float = self.metric(result_state, **self.metric_options)
|
|
502
|
+
|
|
503
|
+
return metric
|
|
504
|
+
|
|
505
|
+
def render(self, mode='rgb_array'):
|
|
506
|
+
if mode == 'rgb_array':
|
|
507
|
+
return self.controller.get_screenshot()
|
|
508
|
+
else:
|
|
509
|
+
raise ValueError('Unsupported render mode: {}'.format(mode))
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from .chrome import (
|
|
2
|
+
get_default_search_engine,
|
|
3
|
+
get_cookie_data,
|
|
4
|
+
get_bookmarks,
|
|
5
|
+
get_open_tabs_info,
|
|
6
|
+
get_pdf_from_url,
|
|
7
|
+
get_shortcuts_on_desktop,
|
|
8
|
+
get_history,
|
|
9
|
+
get_page_info,
|
|
10
|
+
get_enabled_experiments,
|
|
11
|
+
get_chrome_language,
|
|
12
|
+
get_chrome_font_size,
|
|
13
|
+
get_profile_name,
|
|
14
|
+
get_number_of_search_results,
|
|
15
|
+
get_googledrive_file,
|
|
16
|
+
get_active_tab_info,
|
|
17
|
+
get_enable_do_not_track,
|
|
18
|
+
get_enable_enhanced_safety_browsing,
|
|
19
|
+
get_new_startup_page,
|
|
20
|
+
get_find_unpacked_extension_path,
|
|
21
|
+
get_data_delete_automacally,
|
|
22
|
+
get_active_tab_html_parse,
|
|
23
|
+
get_active_tab_url_parse,
|
|
24
|
+
get_gotoRecreationPage_and_get_html_content,
|
|
25
|
+
get_url_dashPart,
|
|
26
|
+
get_active_url_from_accessTree,
|
|
27
|
+
get_find_installed_extension_name,
|
|
28
|
+
get_info_from_website,
|
|
29
|
+
get_macys_product_url_parse,
|
|
30
|
+
get_url_path_parse # Alias for backward compatibility
|
|
31
|
+
)
|
|
32
|
+
from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file
|
|
33
|
+
from .general import get_vm_command_line, get_vm_terminal_output, get_vm_command_error
|
|
34
|
+
from .gimp import get_gimp_config_file
|
|
35
|
+
from .impress import get_audio_in_slide, get_background_image_in_slide
|
|
36
|
+
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory
|
|
37
|
+
from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime, get_time_diff_range
|
|
38
|
+
from .replay import get_replay
|
|
39
|
+
from .vlc import get_vlc_playing_info, get_vlc_config, get_default_video_player
|
|
40
|
+
from .vscode import get_vscode_config
|
|
41
|
+
from .calc import get_conference_city_in_order
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# I want to write a function, reads a csv file, and get all the contents in the third column in the order of rows
|
|
5
|
+
def get_conference_city_in_order(env, config):
|
|
6
|
+
# read the csv file
|
|
7
|
+
csv_path = config['csv_path']
|
|
8
|
+
print(f"Reading csv file from {csv_path}")
|
|
9
|
+
with open(csv_path, 'r') as f:
|
|
10
|
+
reader = csv.reader(f)
|
|
11
|
+
# skip the header row
|
|
12
|
+
next(reader)
|
|
13
|
+
# get the third column in the order of rows
|
|
14
|
+
conference_city_list = [row[2] for row in reader]
|
|
15
|
+
return conference_city_list
|