lybic-guiagents 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lybic-guiagents might be problematic. Click here for more details.

Files changed (85) hide show
  1. desktop_env/__init__.py +1 -0
  2. desktop_env/actions.py +203 -0
  3. desktop_env/controllers/__init__.py +0 -0
  4. desktop_env/controllers/python.py +471 -0
  5. desktop_env/controllers/setup.py +882 -0
  6. desktop_env/desktop_env.py +509 -0
  7. desktop_env/evaluators/__init__.py +5 -0
  8. desktop_env/evaluators/getters/__init__.py +41 -0
  9. desktop_env/evaluators/getters/calc.py +15 -0
  10. desktop_env/evaluators/getters/chrome.py +1774 -0
  11. desktop_env/evaluators/getters/file.py +154 -0
  12. desktop_env/evaluators/getters/general.py +42 -0
  13. desktop_env/evaluators/getters/gimp.py +38 -0
  14. desktop_env/evaluators/getters/impress.py +126 -0
  15. desktop_env/evaluators/getters/info.py +24 -0
  16. desktop_env/evaluators/getters/misc.py +406 -0
  17. desktop_env/evaluators/getters/replay.py +20 -0
  18. desktop_env/evaluators/getters/vlc.py +86 -0
  19. desktop_env/evaluators/getters/vscode.py +35 -0
  20. desktop_env/evaluators/metrics/__init__.py +160 -0
  21. desktop_env/evaluators/metrics/basic_os.py +68 -0
  22. desktop_env/evaluators/metrics/chrome.py +493 -0
  23. desktop_env/evaluators/metrics/docs.py +1011 -0
  24. desktop_env/evaluators/metrics/general.py +665 -0
  25. desktop_env/evaluators/metrics/gimp.py +637 -0
  26. desktop_env/evaluators/metrics/libreoffice.py +28 -0
  27. desktop_env/evaluators/metrics/others.py +92 -0
  28. desktop_env/evaluators/metrics/pdf.py +31 -0
  29. desktop_env/evaluators/metrics/slides.py +957 -0
  30. desktop_env/evaluators/metrics/table.py +585 -0
  31. desktop_env/evaluators/metrics/thunderbird.py +176 -0
  32. desktop_env/evaluators/metrics/utils.py +719 -0
  33. desktop_env/evaluators/metrics/vlc.py +524 -0
  34. desktop_env/evaluators/metrics/vscode.py +283 -0
  35. desktop_env/providers/__init__.py +35 -0
  36. desktop_env/providers/aws/__init__.py +0 -0
  37. desktop_env/providers/aws/manager.py +278 -0
  38. desktop_env/providers/aws/provider.py +186 -0
  39. desktop_env/providers/aws/provider_with_proxy.py +315 -0
  40. desktop_env/providers/aws/proxy_pool.py +193 -0
  41. desktop_env/providers/azure/__init__.py +0 -0
  42. desktop_env/providers/azure/manager.py +87 -0
  43. desktop_env/providers/azure/provider.py +207 -0
  44. desktop_env/providers/base.py +97 -0
  45. desktop_env/providers/gcp/__init__.py +0 -0
  46. desktop_env/providers/gcp/manager.py +0 -0
  47. desktop_env/providers/gcp/provider.py +0 -0
  48. desktop_env/providers/virtualbox/__init__.py +0 -0
  49. desktop_env/providers/virtualbox/manager.py +463 -0
  50. desktop_env/providers/virtualbox/provider.py +124 -0
  51. desktop_env/providers/vmware/__init__.py +0 -0
  52. desktop_env/providers/vmware/manager.py +455 -0
  53. desktop_env/providers/vmware/provider.py +105 -0
  54. gui_agents/__init__.py +0 -0
  55. gui_agents/agents/Action.py +209 -0
  56. gui_agents/agents/__init__.py +0 -0
  57. gui_agents/agents/agent_s.py +832 -0
  58. gui_agents/agents/global_state.py +610 -0
  59. gui_agents/agents/grounding.py +651 -0
  60. gui_agents/agents/hardware_interface.py +129 -0
  61. gui_agents/agents/manager.py +568 -0
  62. gui_agents/agents/translator.py +132 -0
  63. gui_agents/agents/worker.py +355 -0
  64. gui_agents/cli_app.py +560 -0
  65. gui_agents/core/__init__.py +0 -0
  66. gui_agents/core/engine.py +1496 -0
  67. gui_agents/core/knowledge.py +449 -0
  68. gui_agents/core/mllm.py +555 -0
  69. gui_agents/tools/__init__.py +0 -0
  70. gui_agents/tools/tools.py +727 -0
  71. gui_agents/unit_test/__init__.py +0 -0
  72. gui_agents/unit_test/run_tests.py +65 -0
  73. gui_agents/unit_test/test_manager.py +330 -0
  74. gui_agents/unit_test/test_worker.py +269 -0
  75. gui_agents/utils/__init__.py +0 -0
  76. gui_agents/utils/analyze_display.py +301 -0
  77. gui_agents/utils/common_utils.py +263 -0
  78. gui_agents/utils/display_viewer.py +281 -0
  79. gui_agents/utils/embedding_manager.py +53 -0
  80. gui_agents/utils/image_axis_utils.py +27 -0
  81. lybic_guiagents-0.1.0.dist-info/METADATA +416 -0
  82. lybic_guiagents-0.1.0.dist-info/RECORD +85 -0
  83. lybic_guiagents-0.1.0.dist-info/WHEEL +5 -0
  84. lybic_guiagents-0.1.0.dist-info/licenses/LICENSE +201 -0
  85. lybic_guiagents-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1 @@
1
+
desktop_env/actions.py ADDED
@@ -0,0 +1,203 @@
1
+ X_MAX = 1920 # TODO: get the screen resolution
2
+ Y_MAX = 1080
3
+
4
+ KEYBOARD_KEYS = ['\t', '\n', '\r', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 'accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace', 'browserback', 'browserfavorites', 'browserforward', 'browserhome', 'browserrefresh', 'browsersearch', 'browserstop', 'capslock', 'clear', 'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete', 'divide', 'down', 'end', 'enter', 'esc', 'escape', 'execute', 'f1', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f2', 'f20', 'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja', 'kana', 'kanji', 'launchapp1', 'launchapp2', 'launchmail', 'launchmediaselect', 'left', 'modechange', 'multiply', 'nexttrack', 'nonconvert', 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9', 'numlock', 'pagedown', 'pageup', 'pause', 'pgdn', 'pgup', 'playpause', 'prevtrack', 'print', 'printscreen', 'prntscrn', 'prtsc', 'prtscr', 'return', 'right', 'scrolllock', 'select', 'separator', 'shift', 'shiftleft', 'shiftright', 'sleep', 'stop', 'subtract', 'tab', 'up', 'volumedown', 'volumemute', 'volumeup', 'win', 'winleft', 'winright', 'yen', 'command', 'option', 'optionleft', 'optionright']
5
+
6
+ ACTION_SPACE = [
7
+ {
8
+ "action_type": "MOVE_TO",
9
+ "note": "move the cursor to the specified position",
10
+ "parameters": {
11
+ "x": {
12
+ "type": float,
13
+ "range": [0, X_MAX],
14
+ "optional": False,
15
+ },
16
+ "y": {
17
+ "type": float,
18
+ "range": [0, Y_MAX],
19
+ "optional": False,
20
+ }
21
+ }
22
+ },
23
+ {
24
+ "action_type": "CLICK",
25
+ "note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
26
+ "parameters": {
27
+ "button": {
28
+ "type": str,
29
+ "range": ["left", "right", "middle"],
30
+ "optional": True,
31
+ },
32
+ "x": {
33
+ "type": float,
34
+ "range": [0, X_MAX],
35
+ "optional": True,
36
+ },
37
+ "y": {
38
+ "type": float,
39
+ "range": [0, Y_MAX],
40
+ "optional": True,
41
+ },
42
+ "num_clicks": {
43
+ "type": int,
44
+ "range": [1, 2, 3],
45
+ "optional": True,
46
+ },
47
+ }
48
+ },
49
+ {
50
+ "action_type": "MOUSE_DOWN",
51
+ "note": "press the left button if the button not specified, otherwise press the specified button",
52
+ "parameters": {
53
+ "button": {
54
+ "type": str,
55
+ "range": ["left", "right", "middle"],
56
+ "optional": True,
57
+ }
58
+ }
59
+ },
60
+ {
61
+ "action_type": "MOUSE_UP",
62
+ "note": "release the left button if the button not specified, otherwise release the specified button",
63
+ "parameters": {
64
+ "button": {
65
+ "type": str,
66
+ "range": ["left", "right", "middle"],
67
+ "optional": True,
68
+ }
69
+ }
70
+ },
71
+ {
72
+ "action_type": "RIGHT_CLICK",
73
+ "note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
74
+ "parameters": {
75
+ "x": {
76
+ "type": float,
77
+ "range": [0, X_MAX],
78
+ "optional": True,
79
+ },
80
+ "y": {
81
+ "type": float,
82
+ "range": [0, Y_MAX],
83
+ "optional": True,
84
+ }
85
+ }
86
+ },
87
+ {
88
+ "action_type": "DOUBLE_CLICK",
89
+ "note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
90
+ "parameters": {
91
+ "x": {
92
+ "type": float,
93
+ "range": [0, X_MAX],
94
+ "optional": True,
95
+ },
96
+ "y": {
97
+ "type": float,
98
+ "range": [0, Y_MAX],
99
+ "optional": True,
100
+ }
101
+ }
102
+ },
103
+ {
104
+ "action_type": "DRAG_TO",
105
+ "note": "drag the cursor to the specified position with the left button pressed",
106
+ "parameters": {
107
+ "x": {
108
+ "type": float,
109
+ "range": [0, X_MAX],
110
+ "optional": False,
111
+ },
112
+ "y": {
113
+ "type": float,
114
+ "range": [0, Y_MAX],
115
+ "optional": False,
116
+ }
117
+ }
118
+ },
119
+ {
120
+ "action_type": "SCROLL",
121
+ "note": "scroll the mouse wheel up or down",
122
+ "parameters": {
123
+ "dx": {
124
+ "type": int,
125
+ "range": None,
126
+ "optional": False,
127
+ },
128
+ "dy": {
129
+ "type": int,
130
+ "range": None,
131
+ "optional": False,
132
+ }
133
+ }
134
+ },
135
+ {
136
+ "action_type": "TYPING",
137
+ "note": "type the specified text",
138
+ "parameters": {
139
+ "text": {
140
+ "type": str,
141
+ "range": None,
142
+ "optional": False,
143
+ }
144
+ }
145
+ },
146
+ {
147
+ "action_type": "PRESS",
148
+ "note": "press the specified key and release it",
149
+ "parameters": {
150
+ "key": {
151
+ "type": str,
152
+ "range": KEYBOARD_KEYS,
153
+ "optional": False,
154
+ }
155
+ }
156
+ },
157
+ {
158
+ "action_type": "KEY_DOWN",
159
+ "note": "press the specified key",
160
+ "parameters": {
161
+ "key": {
162
+ "type": str,
163
+ "range": KEYBOARD_KEYS,
164
+ "optional": False,
165
+ }
166
+ }
167
+ },
168
+ {
169
+ "action_type": "KEY_UP",
170
+ "note": "release the specified key",
171
+ "parameters": {
172
+ "key": {
173
+ "type": str,
174
+ "range": KEYBOARD_KEYS,
175
+ "optional": False,
176
+ }
177
+ }
178
+ },
179
+ {
180
+ "action_type": "HOTKEY",
181
+ "note": "press the specified key combination",
182
+ "parameters": {
183
+ "keys": {
184
+ "type": list,
185
+ "range": [KEYBOARD_KEYS],
186
+ "optional": False,
187
+ }
188
+ }
189
+ },
190
+ ############################################################################################################
191
+ {
192
+ "action_type": "WAIT",
193
+ "note": "wait until the next action",
194
+ },
195
+ {
196
+ "action_type": "FAIL",
197
+ "note": "decide the task can not be performed",
198
+ },
199
+ {
200
+ "action_type": "DONE",
201
+ "note": "decide the task is done",
202
+ }
203
+ ]
File without changes
@@ -0,0 +1,471 @@
1
+ import json
2
+ import logging
3
+ import random
4
+ from typing import Any, Dict, Optional
5
+ import time
6
+ import requests
7
+
8
+ from desktop_env.actions import KEYBOARD_KEYS
9
+
10
+ logger = logging.getLogger("desktopenv.pycontroller")
11
+
12
+
13
+ class PythonController:
14
+ def __init__(self, vm_ip: str,
15
+ server_port: int,
16
+ pkgs_prefix: str = "import pyautogui; import time; pyautogui.FAILSAFE = False; {command}"):
17
+ self.vm_ip = vm_ip
18
+ self.http_server = f"http://{vm_ip}:{server_port}"
19
+ self.pkgs_prefix = pkgs_prefix # fixme: this is a hacky way to execute python commands. fix it and combine it with installation of packages
20
+ self.retry_times = 3
21
+ self.retry_interval = 5
22
+
23
+ def get_screenshot(self) -> Optional[bytes]:
24
+ """
25
+ Gets a screenshot from the server. With the cursor. None -> no screenshot or unexpected error.
26
+ """
27
+
28
+ for _ in range(self.retry_times):
29
+ try:
30
+ response = requests.get(self.http_server + "/screenshot")
31
+ if response.status_code == 200:
32
+ logger.info("Got screenshot successfully")
33
+ return response.content
34
+ else:
35
+ logger.error("Failed to get screenshot. Status code: %d", response.status_code)
36
+ logger.info("Retrying to get screenshot.")
37
+ except Exception as e:
38
+ logger.error("An error occurred while trying to get the screenshot: %s", e)
39
+ logger.info("Retrying to get screenshot.")
40
+ time.sleep(self.retry_interval)
41
+
42
+ logger.error("Failed to get screenshot.")
43
+ return None
44
+
45
+ def get_accessibility_tree(self) -> Optional[str]:
46
+ """
47
+ Gets the accessibility tree from the server. None -> no accessibility tree or unexpected error.
48
+ """
49
+
50
+ for _ in range(self.retry_times):
51
+ try:
52
+ response: requests.Response = requests.get(self.http_server + "/accessibility")
53
+ if response.status_code == 200:
54
+ logger.info("Got accessibility tree successfully")
55
+ return response.json()["AT"]
56
+ else:
57
+ logger.error("Failed to get accessibility tree. Status code: %d", response.status_code)
58
+ logger.info("Retrying to get accessibility tree.")
59
+ except Exception as e:
60
+ logger.error("An error occurred while trying to get the accessibility tree: %s", e)
61
+ logger.info("Retrying to get accessibility tree.")
62
+ time.sleep(self.retry_interval)
63
+
64
+ logger.error("Failed to get accessibility tree.")
65
+ return None
66
+
67
+ def get_terminal_output(self) -> Optional[str]:
68
+ """
69
+ Gets the terminal output from the server. None -> no terminal output or unexpected error.
70
+ """
71
+
72
+ for _ in range(self.retry_times):
73
+ try:
74
+ response = requests.get(self.http_server + "/terminal")
75
+ if response.status_code == 200:
76
+ logger.info("Got terminal output successfully")
77
+ return response.json()["output"]
78
+ else:
79
+ logger.error("Failed to get terminal output. Status code: %d", response.status_code)
80
+ logger.info("Retrying to get terminal output.")
81
+ except Exception as e:
82
+ logger.error("An error occurred while trying to get the terminal output: %s", e)
83
+ logger.info("Retrying to get terminal output.")
84
+ time.sleep(self.retry_interval)
85
+
86
+ logger.error("Failed to get terminal output.")
87
+ return None
88
+
89
+ def get_file(self, file_path: str) -> Optional[bytes]:
90
+ """
91
+ Gets a file from the server.
92
+ """
93
+
94
+ for _ in range(self.retry_times):
95
+ try:
96
+ response = requests.post(self.http_server + "/file", data={"file_path": file_path})
97
+ if response.status_code == 200:
98
+ logger.info("File downloaded successfully")
99
+ return response.content
100
+ else:
101
+ logger.error("Failed to get file. Status code: %d", response.status_code)
102
+ logger.info("Retrying to get file.")
103
+ except Exception as e:
104
+ logger.error("An error occurred while trying to get the file: %s", e)
105
+ logger.info("Retrying to get file.")
106
+ time.sleep(self.retry_interval)
107
+
108
+ logger.error("Failed to get file.")
109
+ return None
110
+
111
+ def execute_python_command(self, command: str) -> None:
112
+ """
113
+ Executes a python command on the server.
114
+ It can be used to execute the pyautogui commands, or... any other python command. who knows?
115
+ """
116
+ # command_list = ["python", "-c", self.pkgs_prefix.format(command=command)]
117
+ command_list = ["python", "-c", self.pkgs_prefix.format(command=command)]
118
+ payload = json.dumps({"command": command_list, "shell": False})
119
+
120
+ for _ in range(self.retry_times):
121
+ try:
122
+ response = requests.post(self.http_server + "/execute", headers={'Content-Type': 'application/json'},
123
+ data=payload, timeout=90)
124
+ if response.status_code == 200:
125
+ logger.info("Command executed successfully: %s", response.text)
126
+ return response.json()
127
+ else:
128
+ logger.error("Failed to execute command. Status code: %d", response.status_code)
129
+ logger.info("Retrying to execute command.")
130
+ except requests.exceptions.ReadTimeout:
131
+ break
132
+ except Exception as e:
133
+ logger.error("An error occurred while trying to execute the command: %s", e)
134
+ logger.info("Retrying to execute command.")
135
+ time.sleep(self.retry_interval)
136
+
137
+ logger.error("Failed to execute command.")
138
+ return None
139
+
140
+ def execute_action(self, action: Dict[str, Any]):
141
+ """
142
+ Executes an action on the server computer.
143
+ """
144
+ if action in ['WAIT', 'FAIL', 'DONE']:
145
+ return
146
+
147
+ action_type = action["action_type"]
148
+ parameters = action["parameters"] if "parameters" in action else {param: action[param] for param in action if param != 'action_type'}
149
+ move_mode = random.choice(
150
+ ["pyautogui.easeInQuad", "pyautogui.easeOutQuad", "pyautogui.easeInOutQuad", "pyautogui.easeInBounce",
151
+ "pyautogui.easeInElastic"])
152
+ duration = random.uniform(0.5, 1)
153
+
154
+ if action_type == "MOVE_TO":
155
+ if parameters == {} or None:
156
+ self.execute_python_command("pyautogui.moveTo()")
157
+ elif "x" in parameters and "y" in parameters:
158
+ x = parameters["x"]
159
+ y = parameters["y"]
160
+ self.execute_python_command(f"pyautogui.moveTo({x}, {y}, {duration}, {move_mode})")
161
+ else:
162
+ raise Exception(f"Unknown parameters: {parameters}")
163
+
164
+ elif action_type == "CLICK":
165
+ if parameters == {} or None:
166
+ self.execute_python_command("pyautogui.click()")
167
+ elif "button" in parameters and "x" in parameters and "y" in parameters:
168
+ button = parameters["button"]
169
+ x = parameters["x"]
170
+ y = parameters["y"]
171
+ if "num_clicks" in parameters:
172
+ num_clicks = parameters["num_clicks"]
173
+ self.execute_python_command(
174
+ f"pyautogui.click(button='{button}', x={x}, y={y}, clicks={num_clicks})")
175
+ else:
176
+ self.execute_python_command(f"pyautogui.click(button='{button}', x={x}, y={y})")
177
+ elif "button" in parameters and "x" not in parameters and "y" not in parameters:
178
+ button = parameters["button"]
179
+ if "num_clicks" in parameters:
180
+ num_clicks = parameters["num_clicks"]
181
+ self.execute_python_command(f"pyautogui.click(button='{button}', clicks={num_clicks})")
182
+ else:
183
+ self.execute_python_command(f"pyautogui.click(button='{button}')")
184
+ elif "button" not in parameters and "x" in parameters and "y" in parameters:
185
+ x = parameters["x"]
186
+ y = parameters["y"]
187
+ if "num_clicks" in parameters:
188
+ num_clicks = parameters["num_clicks"]
189
+ self.execute_python_command(f"pyautogui.click(x={x}, y={y}, clicks={num_clicks})")
190
+ else:
191
+ self.execute_python_command(f"pyautogui.click(x={x}, y={y})")
192
+ else:
193
+ raise Exception(f"Unknown parameters: {parameters}")
194
+
195
+ elif action_type == "MOUSE_DOWN":
196
+ if parameters == {} or None:
197
+ self.execute_python_command("pyautogui.mouseDown()")
198
+ elif "button" in parameters:
199
+ button = parameters["button"]
200
+ self.execute_python_command(f"pyautogui.mouseDown(button='{button}')")
201
+ else:
202
+ raise Exception(f"Unknown parameters: {parameters}")
203
+
204
+ elif action_type == "MOUSE_UP":
205
+ if parameters == {} or None:
206
+ self.execute_python_command("pyautogui.mouseUp()")
207
+ elif "button" in parameters:
208
+ button = parameters["button"]
209
+ self.execute_python_command(f"pyautogui.mouseUp(button='{button}')")
210
+ else:
211
+ raise Exception(f"Unknown parameters: {parameters}")
212
+
213
+ elif action_type == "RIGHT_CLICK":
214
+ if parameters == {} or None:
215
+ self.execute_python_command("pyautogui.rightClick()")
216
+ elif "x" in parameters and "y" in parameters:
217
+ x = parameters["x"]
218
+ y = parameters["y"]
219
+ self.execute_python_command(f"pyautogui.rightClick(x={x}, y={y})")
220
+ else:
221
+ raise Exception(f"Unknown parameters: {parameters}")
222
+
223
+ elif action_type == "DOUBLE_CLICK":
224
+ if parameters == {} or None:
225
+ self.execute_python_command("pyautogui.doubleClick()")
226
+ elif "x" in parameters and "y" in parameters:
227
+ x = parameters["x"]
228
+ y = parameters["y"]
229
+ self.execute_python_command(f"pyautogui.doubleClick(x={x}, y={y})")
230
+ else:
231
+ raise Exception(f"Unknown parameters: {parameters}")
232
+
233
+ elif action_type == "DRAG_TO":
234
+ if "x" in parameters and "y" in parameters:
235
+ x = parameters["x"]
236
+ y = parameters["y"]
237
+ self.execute_python_command(
238
+ f"pyautogui.dragTo({x}, {y}, duration=1.0, button='left', mouseDownUp=True)")
239
+
240
+ elif action_type == "SCROLL":
241
+ # todo: check if it is related to the operating system, as https://github.com/TheDuckAI/DuckTrack/blob/main/ducktrack/playback.py pointed out
242
+ if "dx" in parameters and "dy" in parameters:
243
+ dx = parameters["dx"]
244
+ dy = parameters["dy"]
245
+ self.execute_python_command(f"pyautogui.hscroll({dx})")
246
+ self.execute_python_command(f"pyautogui.vscroll({dy})")
247
+ elif "dx" in parameters and "dy" not in parameters:
248
+ dx = parameters["dx"]
249
+ self.execute_python_command(f"pyautogui.hscroll({dx})")
250
+ elif "dx" not in parameters and "dy" in parameters:
251
+ dy = parameters["dy"]
252
+ self.execute_python_command(f"pyautogui.vscroll({dy})")
253
+ else:
254
+ raise Exception(f"Unknown parameters: {parameters}")
255
+
256
+ elif action_type == "TYPING":
257
+ if "text" not in parameters:
258
+ raise Exception(f"Unknown parameters: {parameters}")
259
+ # deal with special ' and \ characters
260
+ # text = parameters["text"].replace("\\", "\\\\").replace("'", "\\'")
261
+ # self.execute_python_command(f"pyautogui.typewrite('{text}')")
262
+ text = parameters["text"]
263
+ self.execute_python_command("pyautogui.typewrite({:})".format(repr(text)))
264
+
265
+ elif action_type == "PRESS":
266
+ if "key" not in parameters:
267
+ raise Exception(f"Unknown parameters: {parameters}")
268
+ key = parameters["key"]
269
+ if key.lower() not in KEYBOARD_KEYS:
270
+ raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
271
+ self.execute_python_command(f"pyautogui.press('{key}')")
272
+
273
+ elif action_type == "KEY_DOWN":
274
+ if "key" not in parameters:
275
+ raise Exception(f"Unknown parameters: {parameters}")
276
+ key = parameters["key"]
277
+ if key.lower() not in KEYBOARD_KEYS:
278
+ raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
279
+ self.execute_python_command(f"pyautogui.keyDown('{key}')")
280
+
281
+ elif action_type == "KEY_UP":
282
+ if "key" not in parameters:
283
+ raise Exception(f"Unknown parameters: {parameters}")
284
+ key = parameters["key"]
285
+ if key.lower() not in KEYBOARD_KEYS:
286
+ raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
287
+ self.execute_python_command(f"pyautogui.keyUp('{key}')")
288
+
289
+ elif action_type == "HOTKEY":
290
+ if "keys" not in parameters:
291
+ raise Exception(f"Unknown parameters: {parameters}")
292
+ keys = parameters["keys"]
293
+ if not isinstance(keys, list):
294
+ raise Exception("Keys must be a list of keys")
295
+ for key in keys:
296
+ if key.lower() not in KEYBOARD_KEYS:
297
+ raise Exception(f"Key must be one of {KEYBOARD_KEYS}")
298
+
299
+ keys_para_rep = "', '".join(keys)
300
+ self.execute_python_command(f"pyautogui.hotkey('{keys_para_rep}')")
301
+
302
+ elif action_type in ['WAIT', 'FAIL', 'DONE']:
303
+ pass
304
+
305
+ else:
306
+ raise Exception(f"Unknown action type: {action_type}")
307
+
308
+ # Record video
309
+ def start_recording(self):
310
+ """
311
+ Starts recording the screen.
312
+ """
313
+
314
+ for _ in range(self.retry_times):
315
+ try:
316
+ response = requests.post(self.http_server + "/start_recording")
317
+ if response.status_code == 200:
318
+ logger.info("Recording started successfully")
319
+ return
320
+ else:
321
+ logger.error("Failed to start recording. Status code: %d", response.status_code)
322
+ logger.info("Retrying to start recording.")
323
+ except Exception as e:
324
+ logger.error("An error occurred while trying to start recording: %s", e)
325
+ logger.info("Retrying to start recording.")
326
+ time.sleep(self.retry_interval)
327
+
328
+ logger.error("Failed to start recording.")
329
+
330
+ def end_recording(self, dest: str):
331
+ """
332
+ Ends recording the screen.
333
+ """
334
+
335
+ for _ in range(self.retry_times):
336
+ try:
337
+ response = requests.post(self.http_server + "/end_recording")
338
+ if response.status_code == 200:
339
+ logger.info("Recording stopped successfully")
340
+ with open(dest, 'wb') as f:
341
+ for chunk in response.iter_content(chunk_size=8192):
342
+ if chunk:
343
+ f.write(chunk)
344
+ return
345
+ else:
346
+ logger.error("Failed to stop recording. Status code: %d", response.status_code)
347
+ logger.info("Retrying to stop recording.")
348
+ except Exception as e:
349
+ logger.error("An error occurred while trying to stop recording: %s", e)
350
+ logger.info("Retrying to stop recording.")
351
+ time.sleep(self.retry_interval)
352
+
353
+ logger.error("Failed to stop recording.")
354
+
355
+ # Additional info
356
+ def get_vm_platform(self):
357
+ """
358
+ Gets the size of the vm screen.
359
+ """
360
+ return self.execute_python_command("import platform; print(platform.system())")['output'].strip()
361
+
362
+ def get_vm_screen_size(self):
363
+ """
364
+ Gets the size of the vm screen.
365
+ """
366
+
367
+ for _ in range(self.retry_times):
368
+ try:
369
+ response = requests.post(self.http_server + "/screen_size")
370
+ if response.status_code == 200:
371
+ logger.info("Got screen size successfully")
372
+ return response.json()
373
+ else:
374
+ logger.error("Failed to get screen size. Status code: %d", response.status_code)
375
+ logger.info("Retrying to get screen size.")
376
+ except Exception as e:
377
+ logger.error("An error occurred while trying to get the screen size: %s", e)
378
+ logger.info("Retrying to get screen size.")
379
+ time.sleep(self.retry_interval)
380
+
381
+ logger.error("Failed to get screen size.")
382
+ return None
383
+
384
+ def get_vm_window_size(self, app_class_name: str):
385
+ """
386
+ Gets the size of the vm app window.
387
+ """
388
+
389
+ for _ in range(self.retry_times):
390
+ try:
391
+ response = requests.post(self.http_server + "/window_size", data={"app_class_name": app_class_name})
392
+ if response.status_code == 200:
393
+ logger.info("Got window size successfully")
394
+ return response.json()
395
+ else:
396
+ logger.error("Failed to get window size. Status code: %d", response.status_code)
397
+ logger.info("Retrying to get window size.")
398
+ except Exception as e:
399
+ logger.error("An error occurred while trying to get the window size: %s", e)
400
+ logger.info("Retrying to get window size.")
401
+ time.sleep(self.retry_interval)
402
+
403
+ logger.error("Failed to get window size.")
404
+ return None
405
+
406
+ def get_vm_wallpaper(self):
407
+ """
408
+ Gets the wallpaper of the vm.
409
+ """
410
+
411
+ for _ in range(self.retry_times):
412
+ try:
413
+ response = requests.post(self.http_server + "/wallpaper")
414
+ if response.status_code == 200:
415
+ logger.info("Got wallpaper successfully")
416
+ return response.content
417
+ else:
418
+ logger.error("Failed to get wallpaper. Status code: %d", response.status_code)
419
+ logger.info("Retrying to get wallpaper.")
420
+ except Exception as e:
421
+ logger.error("An error occurred while trying to get the wallpaper: %s", e)
422
+ logger.info("Retrying to get wallpaper.")
423
+ time.sleep(self.retry_interval)
424
+
425
+ logger.error("Failed to get wallpaper.")
426
+ return None
427
+
428
+ def get_vm_desktop_path(self) -> Optional[str]:
429
+ """
430
+ Gets the desktop path of the vm.
431
+ """
432
+
433
+ for _ in range(self.retry_times):
434
+ try:
435
+ response = requests.post(self.http_server + "/desktop_path")
436
+ if response.status_code == 200:
437
+ logger.info("Got desktop path successfully")
438
+ return response.json()["desktop_path"]
439
+ else:
440
+ logger.error("Failed to get desktop path. Status code: %d", response.status_code)
441
+ logger.info("Retrying to get desktop path.")
442
+ except Exception as e:
443
+ logger.error("An error occurred while trying to get the desktop path: %s", e)
444
+ logger.info("Retrying to get desktop path.")
445
+ time.sleep(self.retry_interval)
446
+
447
+ logger.error("Failed to get desktop path.")
448
+ return None
449
+
450
+ def get_vm_directory_tree(self, path) -> Optional[Dict[str, Any]]:
451
+ """
452
+ Gets the directory tree of the vm.
453
+ """
454
+ payload = json.dumps({"path": path})
455
+
456
+ for _ in range(self.retry_times):
457
+ try:
458
+ response = requests.post(self.http_server + "/list_directory", headers={'Content-Type': 'application/json'}, data=payload)
459
+ if response.status_code == 200:
460
+ logger.info("Got directory tree successfully")
461
+ return response.json()["directory_tree"]
462
+ else:
463
+ logger.error("Failed to get directory tree. Status code: %d", response.status_code)
464
+ logger.info("Retrying to get directory tree.")
465
+ except Exception as e:
466
+ logger.error("An error occurred while trying to get directory tree: %s", e)
467
+ logger.info("Retrying to get directory tree.")
468
+ time.sleep(self.retry_interval)
469
+
470
+ logger.error("Failed to get directory tree.")
471
+ return None