lybic-guiagents 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lybic-guiagents might be problematic. Click here for more details.

Files changed (85) hide show
  1. desktop_env/__init__.py +1 -0
  2. desktop_env/actions.py +203 -0
  3. desktop_env/controllers/__init__.py +0 -0
  4. desktop_env/controllers/python.py +471 -0
  5. desktop_env/controllers/setup.py +882 -0
  6. desktop_env/desktop_env.py +509 -0
  7. desktop_env/evaluators/__init__.py +5 -0
  8. desktop_env/evaluators/getters/__init__.py +41 -0
  9. desktop_env/evaluators/getters/calc.py +15 -0
  10. desktop_env/evaluators/getters/chrome.py +1774 -0
  11. desktop_env/evaluators/getters/file.py +154 -0
  12. desktop_env/evaluators/getters/general.py +42 -0
  13. desktop_env/evaluators/getters/gimp.py +38 -0
  14. desktop_env/evaluators/getters/impress.py +126 -0
  15. desktop_env/evaluators/getters/info.py +24 -0
  16. desktop_env/evaluators/getters/misc.py +406 -0
  17. desktop_env/evaluators/getters/replay.py +20 -0
  18. desktop_env/evaluators/getters/vlc.py +86 -0
  19. desktop_env/evaluators/getters/vscode.py +35 -0
  20. desktop_env/evaluators/metrics/__init__.py +160 -0
  21. desktop_env/evaluators/metrics/basic_os.py +68 -0
  22. desktop_env/evaluators/metrics/chrome.py +493 -0
  23. desktop_env/evaluators/metrics/docs.py +1011 -0
  24. desktop_env/evaluators/metrics/general.py +665 -0
  25. desktop_env/evaluators/metrics/gimp.py +637 -0
  26. desktop_env/evaluators/metrics/libreoffice.py +28 -0
  27. desktop_env/evaluators/metrics/others.py +92 -0
  28. desktop_env/evaluators/metrics/pdf.py +31 -0
  29. desktop_env/evaluators/metrics/slides.py +957 -0
  30. desktop_env/evaluators/metrics/table.py +585 -0
  31. desktop_env/evaluators/metrics/thunderbird.py +176 -0
  32. desktop_env/evaluators/metrics/utils.py +719 -0
  33. desktop_env/evaluators/metrics/vlc.py +524 -0
  34. desktop_env/evaluators/metrics/vscode.py +283 -0
  35. desktop_env/providers/__init__.py +35 -0
  36. desktop_env/providers/aws/__init__.py +0 -0
  37. desktop_env/providers/aws/manager.py +278 -0
  38. desktop_env/providers/aws/provider.py +186 -0
  39. desktop_env/providers/aws/provider_with_proxy.py +315 -0
  40. desktop_env/providers/aws/proxy_pool.py +193 -0
  41. desktop_env/providers/azure/__init__.py +0 -0
  42. desktop_env/providers/azure/manager.py +87 -0
  43. desktop_env/providers/azure/provider.py +207 -0
  44. desktop_env/providers/base.py +97 -0
  45. desktop_env/providers/gcp/__init__.py +0 -0
  46. desktop_env/providers/gcp/manager.py +0 -0
  47. desktop_env/providers/gcp/provider.py +0 -0
  48. desktop_env/providers/virtualbox/__init__.py +0 -0
  49. desktop_env/providers/virtualbox/manager.py +463 -0
  50. desktop_env/providers/virtualbox/provider.py +124 -0
  51. desktop_env/providers/vmware/__init__.py +0 -0
  52. desktop_env/providers/vmware/manager.py +455 -0
  53. desktop_env/providers/vmware/provider.py +105 -0
  54. gui_agents/__init__.py +0 -0
  55. gui_agents/agents/Action.py +209 -0
  56. gui_agents/agents/__init__.py +0 -0
  57. gui_agents/agents/agent_s.py +832 -0
  58. gui_agents/agents/global_state.py +610 -0
  59. gui_agents/agents/grounding.py +651 -0
  60. gui_agents/agents/hardware_interface.py +129 -0
  61. gui_agents/agents/manager.py +568 -0
  62. gui_agents/agents/translator.py +132 -0
  63. gui_agents/agents/worker.py +355 -0
  64. gui_agents/cli_app.py +560 -0
  65. gui_agents/core/__init__.py +0 -0
  66. gui_agents/core/engine.py +1496 -0
  67. gui_agents/core/knowledge.py +449 -0
  68. gui_agents/core/mllm.py +555 -0
  69. gui_agents/tools/__init__.py +0 -0
  70. gui_agents/tools/tools.py +727 -0
  71. gui_agents/unit_test/__init__.py +0 -0
  72. gui_agents/unit_test/run_tests.py +65 -0
  73. gui_agents/unit_test/test_manager.py +330 -0
  74. gui_agents/unit_test/test_worker.py +269 -0
  75. gui_agents/utils/__init__.py +0 -0
  76. gui_agents/utils/analyze_display.py +301 -0
  77. gui_agents/utils/common_utils.py +263 -0
  78. gui_agents/utils/display_viewer.py +281 -0
  79. gui_agents/utils/embedding_manager.py +53 -0
  80. gui_agents/utils/image_axis_utils.py +27 -0
  81. lybic_guiagents-0.1.0.dist-info/METADATA +416 -0
  82. lybic_guiagents-0.1.0.dist-info/RECORD +85 -0
  83. lybic_guiagents-0.1.0.dist-info/WHEEL +5 -0
  84. lybic_guiagents-0.1.0.dist-info/licenses/LICENSE +201 -0
  85. lybic_guiagents-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,509 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import time
6
+ import re
7
+ from typing import Callable, Any, Optional, Tuple
8
+ from typing import List, Dict, Union
9
+
10
+ import gymnasium as gym
11
+
12
+ from desktop_env.controllers.python import PythonController
13
+ from desktop_env.controllers.setup import SetupController
14
+ from desktop_env.evaluators import metrics, getters
15
+ from desktop_env.providers import create_vm_manager_and_provider
16
+
17
+ logger = logging.getLogger("desktopenv.env")
18
+
19
+ Metric = Callable[[Any, Any], float]
20
+ Getter = Callable[[gym.Env, Dict[str, Any]], Any]
21
+
22
+ MAX_RETRIES = 5 # Maximum retries for environment setup
23
+
24
+
25
+
26
+ def _fix_pyautogui_less_than_bug(command: str) -> str:
27
+ """
28
+ Fix PyAutoGUI '<' character bug by converting it to hotkey("shift", ',') calls.
29
+
30
+ This fixes the known PyAutoGUI issue where typing '<' produces '>' instead.
31
+ References:
32
+ - https://github.com/asweigart/pyautogui/issues/198
33
+ - https://github.com/xlang-ai/OSWorld/issues/257
34
+
35
+ Args:
36
+ command (str): The original pyautogui command
37
+
38
+ Returns:
39
+ str: The fixed command with '<' characters handled properly
40
+ """
41
+ # Handle typewrite with '<' characters
42
+ def replace_typewrite_less_than(match):
43
+ content = match.group(1)
44
+ # Split the content by '<' and rebuild with hotkey calls
45
+ parts = content.split('<')
46
+ if len(parts) == 1:
47
+ # No '<' found, return original
48
+ return match.group(0)
49
+
50
+ # Rebuild the command
51
+ result_parts = []
52
+ for i, part in enumerate(parts):
53
+ if i == 0:
54
+ # First part, just add typewrite if not empty
55
+ if part:
56
+ result_parts.append(f"pyautogui.typewrite({repr(part)})")
57
+ else:
58
+ # Add hotkey for '<' and then typewrite for the rest if not empty
59
+ result_parts.append('pyautogui.hotkey("shift", ",")')
60
+ if part:
61
+ result_parts.append(f"pyautogui.typewrite({repr(part)})")
62
+
63
+ return '; '.join(result_parts)
64
+
65
+ # Handle press('<') calls
66
+ def replace_press_less_than(match):
67
+ return 'pyautogui.hotkey("shift", ",")'
68
+
69
+ # Pattern to match typewrite calls with quoted strings
70
+ typewrite_pattern = r'pyautogui\.typewrite\((["\'])(.*?)\1\)'
71
+ # Pattern to match press('<') calls
72
+ press_pattern = r'pyautogui\.press\(["\']<["\']\)'
73
+
74
+ # First handle press('<') calls
75
+ command = re.sub(press_pattern, replace_press_less_than, command)
76
+
77
+ # Then handle typewrite calls
78
+ def process_typewrite_match(match):
79
+ quote_char = match.group(1)
80
+ content = match.group(2)
81
+
82
+ # Check if content contains '<'
83
+ if '<' not in content:
84
+ return match.group(0)
85
+
86
+ # Split by '<' and rebuild
87
+ parts = content.split('<')
88
+ result_parts = []
89
+
90
+ for i, part in enumerate(parts):
91
+ if i == 0:
92
+ # First part
93
+ if part:
94
+ result_parts.append(f"pyautogui.typewrite({quote_char}{part}{quote_char})")
95
+ else:
96
+ # Add hotkey for '<' and then typewrite for the rest
97
+ result_parts.append('pyautogui.hotkey("shift", ",")')
98
+ if part:
99
+ result_parts.append(f"pyautogui.typewrite({quote_char}{part}{quote_char})")
100
+
101
+ return '; '.join(result_parts)
102
+
103
+ command = re.sub(typewrite_pattern, process_typewrite_match, command)
104
+
105
+ return command
106
+
107
+
108
+ class DesktopEnv(gym.Env):
109
+ """
110
+ DesktopEnv with OpenAI Gym interface. It provides a desktop environment for setting and evaluating desktop automation tasks.
111
+ """
112
+ def __init__(
113
+ self,
114
+ provider_name: str = "aws",
115
+ region: str = None,
116
+ path_to_vm: str = None,
117
+ snapshot_name: str = "init_state",
118
+ action_space: str = "computer_13",
119
+ cache_dir: str = "cache",
120
+ screen_size: Tuple[int] = (int(os.environ.get("SCREEN_WIDTH", 1920)), int(os.environ.get("SCREEN_HEIGHT", 1080))),
121
+ headless: bool = False,
122
+ require_a11y_tree: bool = True,
123
+ require_terminal: bool = False,
124
+ os_type: str = "Ubuntu",
125
+ enable_proxy: bool = False,
126
+ client_password: str = "",
127
+ ):
128
+ """
129
+ Args:
130
+ provider_name (str): virtualization provider name, default to "vmware"
131
+ region (str): the region for allocate machines, work for cloud services, default to "us-east-1"
132
+ path_to_vm (str): path to .vmx file
133
+ snapshot_name (str): snapshot name to revert to, default to "init_state"
134
+ action_space (str): "computer_13" | "pyautogui"
135
+ cache_dir (str): cache directory to cache task-related stuffs like
136
+ reference file for evaluation
137
+ screen_size (Tuple[int]): screen size of the VM
138
+ headless (bool): whether to run the VM in headless mode
139
+ require_a11y_tree (bool): whether to require accessibility tree
140
+ require_terminal (bool): whether to require terminal output
141
+ os_type (str): operating system type, default to "Ubuntu"
142
+ enable_proxy (bool): whether to enable proxy support, default to False
143
+ """
144
+ # Initialize VM manager and vitualization provider
145
+ self.region = region
146
+ self.provider_name = provider_name
147
+ self.enable_proxy = enable_proxy # Store proxy enablement setting
148
+ if client_password == "":
149
+ if self.provider_name == "aws":
150
+ self.client_password = "osworld-public-evaluation"
151
+ else:
152
+ self.client_password = "password"
153
+ else:
154
+ self.client_password = client_password
155
+
156
+ self.screen_width = screen_size[0]
157
+ self.screen_height = screen_size[1]
158
+
159
+ # Default
160
+ self.server_port = 5000
161
+ self.chromium_port = 9222
162
+ self.vnc_port = 8006
163
+ self.vlc_port = 8080
164
+
165
+ # Initialize with default (no proxy) provider
166
+ self.current_use_proxy = False
167
+ self.manager, self.provider = create_vm_manager_and_provider(provider_name, region, use_proxy=False)
168
+
169
+ self.os_type = os_type
170
+
171
+ # Track whether environment has been used (step/setup) to optimize snapshot revert
172
+ # docker, aws, gcp, azure are always unused as the emulator starts from a clean state
173
+ # vmware, virtualbox are always used as the emulator starts from a dirty state
174
+ if self.provider_name in {"docker", "aws", "gcp", "azure"}:
175
+ self.is_environment_used = False
176
+ elif self.provider_name in {"vmware", "virtualbox"}:
177
+ self.is_environment_used = True
178
+ else:
179
+ raise ValueError(f"Invalid provider name: {self.provider_name}")
180
+
181
+ # Initialize environment variables
182
+ if path_to_vm:
183
+ self.path_to_vm = os.path.abspath(os.path.expandvars(os.path.expanduser(path_to_vm))) \
184
+ if provider_name in {"vmware", "virtualbox"} else path_to_vm
185
+ else:
186
+
187
+ self.path_to_vm = self.manager.get_vm_path(os_type=self.os_type, region=region, screen_size=(self.screen_width, self.screen_height))
188
+ try:
189
+ self.snapshot_name = snapshot_name
190
+ self.cache_dir_base: str = cache_dir
191
+ # todo: add the logic to get the screen size from the VM
192
+ self.headless = headless
193
+ self.require_a11y_tree = require_a11y_tree
194
+ self.require_terminal = require_terminal
195
+
196
+ # Initialize emulator and controller
197
+ if provider_name != "docker": # Check if this is applicable to other VM providers
198
+ logger.info("Initializing...")
199
+ self._start_emulator()
200
+
201
+ # mode: human or machine
202
+ self.instruction = None
203
+ assert action_space in ["computer_13", "pyautogui", "claude_computer_use"]
204
+ self.action_space = action_space # todo: refactor it to the ActType
205
+
206
+ # episodic stuffs, like counters, will be updated or reset
207
+ # when calling self.reset()
208
+ self._traj_no: int = -1
209
+ self._step_no: int = 0
210
+ self.action_history: List[Dict[str, any]] = []
211
+ except Exception as e:
212
+ logger.error(f"Failed to initialize DesktopEnv: {e}")
213
+ # If initialization fails, we should clean up the VM
214
+ try:
215
+ self.close()
216
+ self.manager.delete_vm(self.path_to_vm, self.region)
217
+ logger.info(f"Cleaned up VM {self.path_to_vm}.")
218
+ except Exception as cleanup_error:
219
+ logger.error(f"Failed to clean up VM {self.path_to_vm}: {cleanup_error}")
220
+ raise
221
+
222
+ def _start_emulator(self):
223
+ # Power on the virtual machine
224
+ self.provider.start_emulator(self.path_to_vm, self.headless, self.os_type)
225
+
226
+ # Get the ip from the virtual machine, and setup the controller
227
+ vm_ip_ports = self.provider.get_ip_address(self.path_to_vm).split(':')
228
+ self.vm_ip = vm_ip_ports[0]
229
+ if len(vm_ip_ports) > 1:
230
+ self.server_port = int(vm_ip_ports[1])
231
+ self.chromium_port = int(vm_ip_ports[2])
232
+ self.vnc_port = int(vm_ip_ports[3])
233
+ self.vlc_port = int(vm_ip_ports[4])
234
+ self.controller = PythonController(vm_ip=self.vm_ip, server_port=self.server_port)
235
+ self.setup_controller = SetupController(vm_ip=self.vm_ip, server_port=self.server_port, chromium_port=self.chromium_port, vlc_port=self.vlc_port, cache_dir=self.cache_dir_base, client_password=self.client_password, screen_width=self.screen_width, screen_height=self.screen_height)
236
+
237
+ def _revert_to_snapshot(self):
238
+ # Revert to certain snapshot of the virtual machine, and refresh the path to vm and ip of vm
239
+ # due to the fact it could be changed when implemented by cloud services
240
+ path_to_vm = self.provider.revert_to_snapshot(self.path_to_vm, self.snapshot_name)
241
+ if path_to_vm and not path_to_vm == self.path_to_vm:
242
+ # path_to_vm has to be a new path
243
+
244
+ self.manager.delete_vm(self.path_to_vm, self.region)
245
+ self.manager.add_vm(path_to_vm, self.region)
246
+ self.manager.occupy_vm(path_to_vm, os.getpid(), self.region)
247
+ self.path_to_vm = path_to_vm
248
+
249
+ def _save_state(self, snapshot_name=None):
250
+ # Save the current virtual machine state to a certain snapshot name
251
+ self.provider.save_state(self.path_to_vm, snapshot_name)
252
+
253
+ def close(self):
254
+ # Close (release) the virtual machine
255
+ self.provider.stop_emulator(self.path_to_vm)
256
+
257
+ def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None) -> Dict[str, Any]:
258
+
259
+ # Reset to certain task in OSWorld
260
+ logger.info("Resetting environment...")
261
+ logger.info("Switching task...")
262
+ logger.info("Setting counters...")
263
+ self._traj_no += 1
264
+ self._step_no = 0
265
+ self.action_history.clear()
266
+
267
+ for attempt in range(MAX_RETRIES):
268
+ # Check and handle proxy requirement changes BEFORE starting emulator
269
+ if task_config is not None:
270
+ # Only consider task proxy requirement if proxy is enabled at system level
271
+ task_use_proxy = task_config.get("proxy", False) and self.enable_proxy
272
+ if not self.enable_proxy and task_config.get("proxy", False):
273
+ logger.info("Task requires proxy but proxy is disabled at system level, ignoring proxy requirement.")
274
+
275
+ if task_use_proxy != self.current_use_proxy:
276
+ # keep because get_info_from_website depend on this
277
+ self.current_use_proxy = task_use_proxy
278
+
279
+ # Only revert to snapshot if environment has been used (step/setup)
280
+ # This optimization is especially important for cloud providers like AWS
281
+ # where unnecessary snapshot operations are costly and time-consuming
282
+ if self.is_environment_used:
283
+ logger.info("Environment has been used, reverting to snapshot {}...".format(self.snapshot_name))
284
+ self._revert_to_snapshot()
285
+ logger.info("Starting emulator...")
286
+ self._start_emulator()
287
+ logger.info("Emulator started.")
288
+ # Reset the usage flag after reverting
289
+ self.is_environment_used = False
290
+ else:
291
+ logger.info("Environment is clean, skipping snapshot revert (provider: {}).".format(self.provider_name))
292
+
293
+ if task_config is not None:
294
+ if task_config.get("proxy", False) and self.enable_proxy:
295
+ # If using proxy and proxy is enabled, set up the proxy configuration
296
+ self.setup_controller._proxy_setup(self.client_password)
297
+ self._set_task_info(task_config)
298
+ self.setup_controller.reset_cache_dir(self.cache_dir)
299
+ logger.info("Setting up environment...")
300
+ success = self.setup_controller.setup(self.config, task_config.get("proxy", False) and self.enable_proxy)
301
+ if success:
302
+ # Mark environment as used when setup is successfully executed
303
+ if self.config: # Only mark as used if there were actual setup operations
304
+ self.is_environment_used = True
305
+ break
306
+ else:
307
+ logger.error(
308
+ "Environment setup failed, retrying (%d/%d)...",
309
+ attempt + 1,
310
+ MAX_RETRIES,
311
+ )
312
+ time.sleep(5)
313
+ else:
314
+ break
315
+
316
+ logger.info("Environment setup complete.")
317
+
318
+ observation = self._get_obs()
319
+ return observation
320
+
321
+ def _get_obs(self):
322
+ # We provide screenshot, accessibility_tree (optional), terminal (optional), and instruction.
323
+ # can be customized and scaled
324
+ return {
325
+ "screenshot": self.controller.get_screenshot(),
326
+ "accessibility_tree": self.controller.get_accessibility_tree() if self.require_a11y_tree else None,
327
+ "terminal": self.controller.get_terminal_output() if self.require_terminal else None,
328
+ "instruction": self.instruction
329
+ }
330
+
331
+ @property
332
+ def vm_platform(self):
333
+ return self.controller.get_vm_platform()
334
+
335
+ @property
336
+ def vm_screen_size(self):
337
+ return self.controller.get_vm_screen_size()
338
+
339
+ def _set_task_info(self, task_config: Dict[str, Any]):
340
+ """Set task info (proxy logic is handled in reset method)"""
341
+ self.task_id: str = task_config["id"]
342
+ self.cache_dir: str = os.path.join(self.cache_dir_base, self.task_id)
343
+ os.makedirs(self.cache_dir, exist_ok=True)
344
+ self.instruction = task_config["instruction"]
345
+ self.config = task_config["config"] if "config" in task_config else []
346
+
347
+ self._set_evaluator_info(task_config)
348
+
349
+ def _set_evaluator_info(self, task_config: Dict[str, Any]):
350
+ """Set evaluator information from task config"""
351
+ # evaluator dict
352
+ # func -> metric function string, or list of metric function strings
353
+ # conj -> conjunction of multiple metrics if func is a list with length > 1, "and"/"or"
354
+ # result -> result getter config, or list of result getter configs
355
+ # expected (optional) -> expected getter config, or list of expected getter configs
356
+ # options (optional) -> metric options, or list of metric options
357
+ # if func is a str list, then result, expected (if exists), options (if exists) should also be lists of the same length
358
+ # even if one of the metrics does not need expected or options field, it should be included in the list with None
359
+ self.evaluator = task_config["evaluator"]
360
+ self.metric: Metric = [getattr(metrics, func) for func in self.evaluator["func"]] \
361
+ if isinstance(self.evaluator["func"], list) \
362
+ else getattr(metrics, self.evaluator["func"])
363
+ self.metric_conj: str = self.evaluator.get("conj", "and") # take conjunction of multiple metrics
364
+ if "result" in self.evaluator and len(self.evaluator["result"]) > 0:
365
+ self.result_getter: Getter = [getattr(getters, "get_{:}".format(res["type"])) for res in
366
+ self.evaluator["result"]] \
367
+ if isinstance(self.evaluator["result"], list) \
368
+ else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
369
+ else:
370
+ self.result_getter = [None] * len(self.metric) \
371
+ if isinstance(self.metric, list) \
372
+ else None
373
+
374
+ if "expected" in self.evaluator and len(self.evaluator["expected"]) > 0:
375
+ self.expected_getter: Getter = [getattr(getters, "get_{:}".format(exp["type"])) if exp else None for exp in
376
+ self.evaluator["expected"]] \
377
+ if isinstance(self.evaluator["expected"], list) \
378
+ else getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"]))
379
+ else:
380
+ self.expected_getter = [None] * len(self.metric) \
381
+ if isinstance(self.metric, list) \
382
+ else None
383
+ self.metric_options: Union[List[Dict[str, Any]], Dict[str, Any]] = [opt if opt else {} for opt in
384
+ self.evaluator["options"]] \
385
+ if isinstance(self.evaluator.get("options", {}), list) \
386
+ else self.evaluator["options"] \
387
+ if "options" in self.evaluator \
388
+ else [{}] * len(self.metric) \
389
+ if isinstance(self.metric, list) \
390
+ else {}
391
+
392
+ assert (not isinstance(self.evaluator["func"], list)
393
+ or (len(self.metric) == len(self.result_getter) == len(self.expected_getter) == len(
394
+ self.metric_options)))
395
+
396
+ def step(self, action, pause=2):
397
+ self._step_no += 1
398
+ self.action_history.append(action)
399
+
400
+ # Mark environment as used when step is called
401
+ self.is_environment_used = True
402
+
403
+ reward = 0 # todo: Define reward calculation for each example
404
+ done = False # todo: Define episode termination condition for each example
405
+ info = {}
406
+ logger.info(f"Step {self._step_no} in trajectory {self._traj_no} with action: {action}")
407
+ # handle the special actions
408
+ if action in ['WAIT', 'FAIL', 'DONE'] or (type(action) == dict and action['action_type'] in ['WAIT', 'FAIL', 'DONE']):
409
+ if action == 'WAIT':
410
+ time.sleep(pause)
411
+ elif action == 'FAIL':
412
+ done = True
413
+ info = {"fail": True}
414
+ elif action == 'DONE':
415
+ done = True
416
+ info = {"done": True}
417
+
418
+ if self.action_space == "computer_13":
419
+ # the set of all possible actions defined in the action representation
420
+ self.controller.execute_action(action)
421
+ elif self.action_space == "pyautogui" or self.action_space == "claude_computer_use":
422
+ if action in ['WAIT', 'FAIL', 'DONE']:
423
+ self.controller.execute_action(action)
424
+ else:
425
+ # the set of all possible python commands insides `pyautogui`
426
+ if type(action) == str:
427
+ # Fix PyAutoGUI '<' character bug before execution
428
+ fixed_command = _fix_pyautogui_less_than_bug(action)
429
+ self.controller.execute_python_command(fixed_command)
430
+ elif type(action) == dict:
431
+ # Fix PyAutoGUI '<' character bug before execution
432
+ fixed_command = _fix_pyautogui_less_than_bug(action['command'])
433
+ self.controller.execute_python_command(fixed_command)
434
+
435
+ time.sleep(pause)
436
+ observation = self._get_obs()
437
+
438
+ return observation, reward, done, info
439
+
440
+ def evaluate(self):
441
+ """
442
+ Evaluate whether the task is successfully completed.
443
+ """
444
+
445
+ postconfig = self.evaluator.get("postconfig", [])
446
+ self.setup_controller.setup(postconfig)
447
+ # Mark environment as used if there were postconfig setup operations
448
+ if postconfig:
449
+ self.is_environment_used = True
450
+
451
+ if self.evaluator['func'] == "infeasible":
452
+ if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":
453
+ return 1
454
+ else:
455
+ return 0
456
+ else:
457
+ if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":
458
+ return 0
459
+
460
+ if type(self.metric) == list:
461
+ # Multiple metrics to evaluate whether the task is successfully completed
462
+ results = []
463
+ assert len(self.metric) == len(self.result_getter), "The number of metrics and result getters must be the same"
464
+ if "expected" in self.evaluator:
465
+ assert len(self.metric) == len(self.expected_getter), "The number of metrics and expected getters must be the same"
466
+ for idx, metric in enumerate(self.metric):
467
+ try:
468
+ config = self.evaluator["result"][idx]
469
+ result_state = self.result_getter[idx](self, config)
470
+ except FileNotFoundError:
471
+ logger.error("File not found!")
472
+ if self.metric_conj == 'and':
473
+ return 0
474
+
475
+ if "expected" in self.evaluator and self.expected_getter and self.evaluator["expected"]:
476
+ expected_state = self.expected_getter[idx](self, self.evaluator["expected"][idx])
477
+ metric: int = metric(result_state, expected_state, **self.metric_options[idx])
478
+ else:
479
+ metric: int = metric(result_state, **self.metric_options[idx])
480
+
481
+ if self.metric_conj == 'and' and float(metric) == 0.0:
482
+ return 0
483
+ elif self.metric_conj == 'or' and float(metric) == 1.0:
484
+ return 1
485
+ else:
486
+ results.append(metric)
487
+
488
+ return sum(results) / len(results) if self.metric_conj == 'and' else max(results)
489
+ else:
490
+ # Single metric to evaluate whether the task is successfully completed
491
+ try:
492
+ result_state = self.result_getter(self, self.evaluator["result"])
493
+ except FileNotFoundError:
494
+ logger.error("File not found!")
495
+ return 0
496
+
497
+ if "expected" in self.evaluator and self.expected_getter and self.evaluator["expected"]:
498
+ expected_state = self.expected_getter(self, self.evaluator["expected"])
499
+ metric: float = self.metric(result_state, expected_state, **self.metric_options)
500
+ else:
501
+ metric: float = self.metric(result_state, **self.metric_options)
502
+
503
+ return metric
504
+
505
+ def render(self, mode='rgb_array'):
506
+ if mode == 'rgb_array':
507
+ return self.controller.get_screenshot()
508
+ else:
509
+ raise ValueError('Unsupported render mode: {}'.format(mode))
@@ -0,0 +1,5 @@
1
+ #from .table import compare_table
2
+
3
+ #eval_funcs = {
4
+ #"compare_table(expected, actual)": compare_table
5
+ #}
@@ -0,0 +1,41 @@
1
+ from .chrome import (
2
+ get_default_search_engine,
3
+ get_cookie_data,
4
+ get_bookmarks,
5
+ get_open_tabs_info,
6
+ get_pdf_from_url,
7
+ get_shortcuts_on_desktop,
8
+ get_history,
9
+ get_page_info,
10
+ get_enabled_experiments,
11
+ get_chrome_language,
12
+ get_chrome_font_size,
13
+ get_profile_name,
14
+ get_number_of_search_results,
15
+ get_googledrive_file,
16
+ get_active_tab_info,
17
+ get_enable_do_not_track,
18
+ get_enable_enhanced_safety_browsing,
19
+ get_new_startup_page,
20
+ get_find_unpacked_extension_path,
21
+ get_data_delete_automacally,
22
+ get_active_tab_html_parse,
23
+ get_active_tab_url_parse,
24
+ get_gotoRecreationPage_and_get_html_content,
25
+ get_url_dashPart,
26
+ get_active_url_from_accessTree,
27
+ get_find_installed_extension_name,
28
+ get_info_from_website,
29
+ get_macys_product_url_parse,
30
+ get_url_path_parse # Alias for backward compatibility
31
+ )
32
+ from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file
33
+ from .general import get_vm_command_line, get_vm_terminal_output, get_vm_command_error
34
+ from .gimp import get_gimp_config_file
35
+ from .impress import get_audio_in_slide, get_background_image_in_slide
36
+ from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory
37
+ from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime, get_time_diff_range
38
+ from .replay import get_replay
39
+ from .vlc import get_vlc_playing_info, get_vlc_config, get_default_video_player
40
+ from .vscode import get_vscode_config
41
+ from .calc import get_conference_city_in_order
@@ -0,0 +1,15 @@
1
+ import csv
2
+
3
+
4
+ # I want to write a function, reads a csv file, and get all the contents in the third column in the order of rows
5
+ def get_conference_city_in_order(env, config):
6
+ # read the csv file
7
+ csv_path = config['csv_path']
8
+ print(f"Reading csv file from {csv_path}")
9
+ with open(csv_path, 'r') as f:
10
+ reader = csv.reader(f)
11
+ # skip the header row
12
+ next(reader)
13
+ # get the third column in the order of rows
14
+ conference_city_list = [row[2] for row in reader]
15
+ return conference_city_list