lybic-guiagents 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lybic-guiagents might be problematic. Click here for more details.
- desktop_env/__init__.py +1 -0
- desktop_env/actions.py +203 -0
- desktop_env/controllers/__init__.py +0 -0
- desktop_env/controllers/python.py +471 -0
- desktop_env/controllers/setup.py +882 -0
- desktop_env/desktop_env.py +509 -0
- desktop_env/evaluators/__init__.py +5 -0
- desktop_env/evaluators/getters/__init__.py +41 -0
- desktop_env/evaluators/getters/calc.py +15 -0
- desktop_env/evaluators/getters/chrome.py +1774 -0
- desktop_env/evaluators/getters/file.py +154 -0
- desktop_env/evaluators/getters/general.py +42 -0
- desktop_env/evaluators/getters/gimp.py +38 -0
- desktop_env/evaluators/getters/impress.py +126 -0
- desktop_env/evaluators/getters/info.py +24 -0
- desktop_env/evaluators/getters/misc.py +406 -0
- desktop_env/evaluators/getters/replay.py +20 -0
- desktop_env/evaluators/getters/vlc.py +86 -0
- desktop_env/evaluators/getters/vscode.py +35 -0
- desktop_env/evaluators/metrics/__init__.py +160 -0
- desktop_env/evaluators/metrics/basic_os.py +68 -0
- desktop_env/evaluators/metrics/chrome.py +493 -0
- desktop_env/evaluators/metrics/docs.py +1011 -0
- desktop_env/evaluators/metrics/general.py +665 -0
- desktop_env/evaluators/metrics/gimp.py +637 -0
- desktop_env/evaluators/metrics/libreoffice.py +28 -0
- desktop_env/evaluators/metrics/others.py +92 -0
- desktop_env/evaluators/metrics/pdf.py +31 -0
- desktop_env/evaluators/metrics/slides.py +957 -0
- desktop_env/evaluators/metrics/table.py +585 -0
- desktop_env/evaluators/metrics/thunderbird.py +176 -0
- desktop_env/evaluators/metrics/utils.py +719 -0
- desktop_env/evaluators/metrics/vlc.py +524 -0
- desktop_env/evaluators/metrics/vscode.py +283 -0
- desktop_env/providers/__init__.py +35 -0
- desktop_env/providers/aws/__init__.py +0 -0
- desktop_env/providers/aws/manager.py +278 -0
- desktop_env/providers/aws/provider.py +186 -0
- desktop_env/providers/aws/provider_with_proxy.py +315 -0
- desktop_env/providers/aws/proxy_pool.py +193 -0
- desktop_env/providers/azure/__init__.py +0 -0
- desktop_env/providers/azure/manager.py +87 -0
- desktop_env/providers/azure/provider.py +207 -0
- desktop_env/providers/base.py +97 -0
- desktop_env/providers/gcp/__init__.py +0 -0
- desktop_env/providers/gcp/manager.py +0 -0
- desktop_env/providers/gcp/provider.py +0 -0
- desktop_env/providers/virtualbox/__init__.py +0 -0
- desktop_env/providers/virtualbox/manager.py +463 -0
- desktop_env/providers/virtualbox/provider.py +124 -0
- desktop_env/providers/vmware/__init__.py +0 -0
- desktop_env/providers/vmware/manager.py +455 -0
- desktop_env/providers/vmware/provider.py +105 -0
- gui_agents/__init__.py +0 -0
- gui_agents/agents/Action.py +209 -0
- gui_agents/agents/__init__.py +0 -0
- gui_agents/agents/agent_s.py +832 -0
- gui_agents/agents/global_state.py +610 -0
- gui_agents/agents/grounding.py +651 -0
- gui_agents/agents/hardware_interface.py +129 -0
- gui_agents/agents/manager.py +568 -0
- gui_agents/agents/translator.py +132 -0
- gui_agents/agents/worker.py +355 -0
- gui_agents/cli_app.py +560 -0
- gui_agents/core/__init__.py +0 -0
- gui_agents/core/engine.py +1496 -0
- gui_agents/core/knowledge.py +449 -0
- gui_agents/core/mllm.py +555 -0
- gui_agents/tools/__init__.py +0 -0
- gui_agents/tools/tools.py +727 -0
- gui_agents/unit_test/__init__.py +0 -0
- gui_agents/unit_test/run_tests.py +65 -0
- gui_agents/unit_test/test_manager.py +330 -0
- gui_agents/unit_test/test_worker.py +269 -0
- gui_agents/utils/__init__.py +0 -0
- gui_agents/utils/analyze_display.py +301 -0
- gui_agents/utils/common_utils.py +263 -0
- gui_agents/utils/display_viewer.py +281 -0
- gui_agents/utils/embedding_manager.py +53 -0
- gui_agents/utils/image_axis_utils.py +27 -0
- lybic_guiagents-0.1.0.dist-info/METADATA +416 -0
- lybic_guiagents-0.1.0.dist-info/RECORD +85 -0
- lybic_guiagents-0.1.0.dist-info/WHEEL +5 -0
- lybic_guiagents-0.1.0.dist-info/licenses/LICENSE +201 -0
- lybic_guiagents-0.1.0.dist-info/top_level.txt +2 -0
gui_agents/cli_app.py
ADDED
|
@@ -0,0 +1,560 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import datetime
|
|
3
|
+
import io
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import platform
|
|
7
|
+
import pyautogui
|
|
8
|
+
import sys
|
|
9
|
+
import time
|
|
10
|
+
import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
from gui_agents.agents.Backend.PyAutoGUIBackend import PyAutoGUIBackend
|
|
14
|
+
|
|
15
|
+
env_path = Path(os.path.dirname(os.path.abspath(__file__))) / '.env'
|
|
16
|
+
if env_path.exists():
|
|
17
|
+
load_dotenv(dotenv_path=env_path)
|
|
18
|
+
else:
|
|
19
|
+
parent_env_path = Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) / '.env'
|
|
20
|
+
if parent_env_path.exists():
|
|
21
|
+
load_dotenv(dotenv_path=parent_env_path)
|
|
22
|
+
|
|
23
|
+
from PIL import Image
|
|
24
|
+
|
|
25
|
+
# from gui_agents.agents.grounding import OSWorldACI
|
|
26
|
+
from gui_agents.agents.Action import Screenshot
|
|
27
|
+
from gui_agents.agents.agent_s import AgentS2, AgentSFast
|
|
28
|
+
|
|
29
|
+
from gui_agents.store.registry import Registry
|
|
30
|
+
from gui_agents.agents.global_state import GlobalState
|
|
31
|
+
from gui_agents.agents.hardware_interface import HardwareInterface
|
|
32
|
+
|
|
33
|
+
# Import analyze_display functionality
|
|
34
|
+
from gui_agents.utils.analyze_display import analyze_display_json, aggregate_results, format_output_line
|
|
35
|
+
|
|
36
|
+
current_platform = platform.system().lower()
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger()
|
|
39
|
+
logger.setLevel(logging.DEBUG)
|
|
40
|
+
|
|
41
|
+
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
42
|
+
|
|
43
|
+
log_dir = "runtime"
|
|
44
|
+
os.makedirs(os.path.join(log_dir, datetime_str), exist_ok=True)
|
|
45
|
+
|
|
46
|
+
file_handler = logging.FileHandler(
|
|
47
|
+
os.path.join(log_dir, datetime_str, "normal.log"), encoding="utf-8"
|
|
48
|
+
)
|
|
49
|
+
debug_handler = logging.FileHandler(
|
|
50
|
+
os.path.join(log_dir, datetime_str, "debug.log"), encoding="utf-8"
|
|
51
|
+
)
|
|
52
|
+
stdout_handler = logging.StreamHandler(sys.stdout)
|
|
53
|
+
sdebug_handler = logging.FileHandler(
|
|
54
|
+
os.path.join(log_dir, datetime_str, "sdebug.log"), encoding="utf-8"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
file_handler.setLevel(logging.INFO)
|
|
58
|
+
debug_handler.setLevel(logging.DEBUG)
|
|
59
|
+
stdout_handler.setLevel(logging.INFO)
|
|
60
|
+
sdebug_handler.setLevel(logging.DEBUG)
|
|
61
|
+
|
|
62
|
+
formatter = logging.Formatter(
|
|
63
|
+
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s"
|
|
64
|
+
)
|
|
65
|
+
file_handler.setFormatter(formatter)
|
|
66
|
+
debug_handler.setFormatter(formatter)
|
|
67
|
+
stdout_handler.setFormatter(formatter)
|
|
68
|
+
sdebug_handler.setFormatter(formatter)
|
|
69
|
+
|
|
70
|
+
stdout_handler.addFilter(logging.Filter("desktopenv"))
|
|
71
|
+
sdebug_handler.addFilter(logging.Filter("desktopenv"))
|
|
72
|
+
|
|
73
|
+
logger.addHandler(file_handler)
|
|
74
|
+
logger.addHandler(debug_handler)
|
|
75
|
+
logger.addHandler(stdout_handler)
|
|
76
|
+
logger.addHandler(sdebug_handler)
|
|
77
|
+
|
|
78
|
+
platform_os = platform.system()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def auto_analyze_execution(timestamp_dir: str):
|
|
82
|
+
"""
|
|
83
|
+
Automatically analyze execution statistics from display.json files after task completion
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
timestamp_dir: Directory containing the execution logs and display.json
|
|
87
|
+
"""
|
|
88
|
+
import time
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
# Analyze the display.json file for this execution
|
|
92
|
+
display_json_path = os.path.join(timestamp_dir, "display.json")
|
|
93
|
+
|
|
94
|
+
# Wait for file to be fully written
|
|
95
|
+
max_wait_time = 10 # Maximum wait time in seconds
|
|
96
|
+
wait_interval = 0.5 # Check every 0.5 seconds
|
|
97
|
+
waited_time = 0
|
|
98
|
+
|
|
99
|
+
while waited_time < max_wait_time:
|
|
100
|
+
if os.path.exists(display_json_path):
|
|
101
|
+
# Check if file is still being written by monitoring its size
|
|
102
|
+
try:
|
|
103
|
+
size1 = os.path.getsize(display_json_path)
|
|
104
|
+
time.sleep(wait_interval)
|
|
105
|
+
size2 = os.path.getsize(display_json_path)
|
|
106
|
+
|
|
107
|
+
# If file size hasn't changed in the last 0.5 seconds, it's likely complete
|
|
108
|
+
if size1 == size2:
|
|
109
|
+
logger.info(f"Display.json file appears to be complete (size: {size1} bytes)")
|
|
110
|
+
break
|
|
111
|
+
else:
|
|
112
|
+
logger.info(f"Display.json file still being written (size changed from {size1} to {size2} bytes)")
|
|
113
|
+
waited_time += wait_interval
|
|
114
|
+
continue
|
|
115
|
+
except OSError:
|
|
116
|
+
# File might be temporarily inaccessible
|
|
117
|
+
time.sleep(wait_interval)
|
|
118
|
+
waited_time += wait_interval
|
|
119
|
+
continue
|
|
120
|
+
else:
|
|
121
|
+
logger.info(f"Waiting for display.json file to be created... ({waited_time:.1f}s)")
|
|
122
|
+
time.sleep(wait_interval)
|
|
123
|
+
waited_time += wait_interval
|
|
124
|
+
|
|
125
|
+
if os.path.exists(display_json_path):
|
|
126
|
+
logger.info(f"Auto-analyzing execution statistics from: {display_json_path}")
|
|
127
|
+
|
|
128
|
+
# Analyze the single display.json file
|
|
129
|
+
result = analyze_display_json(display_json_path)
|
|
130
|
+
|
|
131
|
+
if result:
|
|
132
|
+
# Format and log the statistics
|
|
133
|
+
output_line = format_output_line(result)
|
|
134
|
+
logger.info("=" * 80)
|
|
135
|
+
logger.info("EXECUTION STATISTICS:")
|
|
136
|
+
logger.info("Steps, Duration (seconds), (Input Tokens, Output Tokens, Total Tokens), Cost")
|
|
137
|
+
logger.info("=" * 80)
|
|
138
|
+
logger.info(output_line)
|
|
139
|
+
logger.info("=" * 80)
|
|
140
|
+
|
|
141
|
+
# Also print to console for immediate visibility
|
|
142
|
+
print("\n" + "=" * 80)
|
|
143
|
+
print("EXECUTION STATISTICS:")
|
|
144
|
+
print("Steps, Duration (seconds), (Input Tokens, Output Tokens, Total Tokens), Cost")
|
|
145
|
+
print("=" * 80)
|
|
146
|
+
print(output_line)
|
|
147
|
+
print("=" * 80)
|
|
148
|
+
else:
|
|
149
|
+
logger.warning("No valid data found in display.json for analysis")
|
|
150
|
+
else:
|
|
151
|
+
logger.warning(f"Display.json file not found at: {display_json_path} after waiting {max_wait_time} seconds")
|
|
152
|
+
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.error(f"Error during auto-analysis: {e}")
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def show_permission_dialog(code: str, action_description: str):
|
|
158
|
+
"""Show a platform-specific permission dialog and return True if approved."""
|
|
159
|
+
if platform.system() == "Darwin":
|
|
160
|
+
result = os.system(
|
|
161
|
+
f'osascript -e \'display dialog "Do you want to execute this action?\n\n{code} which will try to {action_description}" with title "Action Permission" buttons {{"Cancel", "OK"}} default button "OK" cancel button "Cancel"\''
|
|
162
|
+
)
|
|
163
|
+
return result == 0
|
|
164
|
+
elif platform.system() == "Linux":
|
|
165
|
+
result = os.system(
|
|
166
|
+
f'zenity --question --title="Action Permission" --text="Do you want to execute this action?\n\n{code}" --width=400 --height=200'
|
|
167
|
+
)
|
|
168
|
+
return result == 0
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def scale_screenshot_dimensions(screenshot: Image.Image, hwi_para: HardwareInterface):
|
|
173
|
+
screenshot_high = screenshot.height
|
|
174
|
+
screenshot_width = screenshot.width
|
|
175
|
+
if isinstance(hwi_para.backend, PyAutoGUIBackend):
|
|
176
|
+
screen_width, screen_height = pyautogui.size()
|
|
177
|
+
if screen_width != screenshot_width or screen_height != screenshot_high:
|
|
178
|
+
screenshot = screenshot.resize((screen_width, screen_height), Image.Resampling.LANCZOS)
|
|
179
|
+
|
|
180
|
+
return screenshot
|
|
181
|
+
|
|
182
|
+
def run_agent_normal(agent, instruction: str, hwi_para: HardwareInterface, max_steps: int = 50, enable_takeover: bool = False):
|
|
183
|
+
import time
|
|
184
|
+
obs = {}
|
|
185
|
+
traj = "Task:\n" + instruction
|
|
186
|
+
subtask_traj = ""
|
|
187
|
+
global_state: GlobalState = Registry.get("GlobalStateStore") # type: ignore
|
|
188
|
+
global_state.set_Tu(instruction)
|
|
189
|
+
global_state.set_running_state("running")
|
|
190
|
+
hwi = hwi_para
|
|
191
|
+
|
|
192
|
+
total_start_time = time.time()
|
|
193
|
+
for _ in range(max_steps):
|
|
194
|
+
while global_state.get_running_state() == "stopped":
|
|
195
|
+
user_input = input(
|
|
196
|
+
"Agent execution is paused. Enter 'continue' to resume: ")
|
|
197
|
+
if user_input == "continue":
|
|
198
|
+
global_state.set_running_state("running")
|
|
199
|
+
logger.info("Agent execution resumed by user")
|
|
200
|
+
break
|
|
201
|
+
time.sleep(0.5)
|
|
202
|
+
|
|
203
|
+
screenshot: Image.Image = hwi.dispatch(Screenshot()) # type: ignore
|
|
204
|
+
global_state.set_screenshot(
|
|
205
|
+
scale_screenshot_dimensions(screenshot, hwi_para)) # type: ignore
|
|
206
|
+
obs = global_state.get_obs_for_manager()
|
|
207
|
+
|
|
208
|
+
predict_start = time.time()
|
|
209
|
+
info, code = agent.predict(instruction=instruction, observation=obs)
|
|
210
|
+
predict_time = time.time() - predict_start
|
|
211
|
+
logger.info(
|
|
212
|
+
f"[Step Timing] agent.predict execution time: {predict_time:.2f} seconds"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
global_state.log_operation(module="agent",
|
|
216
|
+
operation="agent.predict",
|
|
217
|
+
data={"duration": predict_time})
|
|
218
|
+
|
|
219
|
+
if "done" in code[0]["type"].lower() or "fail" in code[0]["type"].lower(
|
|
220
|
+
):
|
|
221
|
+
if platform.system() == "Darwin":
|
|
222
|
+
os.system(
|
|
223
|
+
f'osascript -e \'display dialog "Task Completed" with title "OpenACI Agent" buttons "OK" default button "OK"\''
|
|
224
|
+
)
|
|
225
|
+
elif platform.system() == "Linux":
|
|
226
|
+
os.system(
|
|
227
|
+
f'zenity --info --title="OpenACI Agent" --text="Task Completed" --width=200 --height=100'
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
agent.update_narrative_memory(traj)
|
|
231
|
+
break
|
|
232
|
+
|
|
233
|
+
if "next" in code[0]["type"].lower():
|
|
234
|
+
continue
|
|
235
|
+
|
|
236
|
+
if "wait" in code[0]["type"].lower():
|
|
237
|
+
time.sleep(5)
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
if enable_takeover and "usertakeover" in code[0]["type"].lower():
|
|
241
|
+
message = code[0].get("message", "need user takeover")
|
|
242
|
+
logger.info(f"User takeover request: {message}")
|
|
243
|
+
|
|
244
|
+
global_state.set_running_state("stopped")
|
|
245
|
+
|
|
246
|
+
if platform.system() == "Darwin":
|
|
247
|
+
os.system(
|
|
248
|
+
f'osascript -e \'display dialog "{message}" with title "User takeover request" buttons "Continue" default button "Continue"\''
|
|
249
|
+
)
|
|
250
|
+
elif platform.system() == "Linux":
|
|
251
|
+
os.system(
|
|
252
|
+
f'zenity --info --title="User takeover request" --text="{message}" --width=300 --height=150'
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
logger.info("Agent execution paused waiting for user takeover")
|
|
256
|
+
continue
|
|
257
|
+
elif not enable_takeover and "usertakeover" in code[0]["type"].lower():
|
|
258
|
+
logger.info(
|
|
259
|
+
f"User takeover request received but takeover is disabled. Continuing execution."
|
|
260
|
+
)
|
|
261
|
+
continue
|
|
262
|
+
|
|
263
|
+
else:
|
|
264
|
+
time.sleep(1.0)
|
|
265
|
+
logger.info(f"EXECUTING CODE: {code[0]}")
|
|
266
|
+
|
|
267
|
+
step_dispatch_start = time.time()
|
|
268
|
+
hwi.dispatchDict(code[0])
|
|
269
|
+
step_dispatch_time = time.time() - step_dispatch_start
|
|
270
|
+
logger.info(
|
|
271
|
+
f"[Step Timing] hwi.dispatchDict execution time: {step_dispatch_time:.2f} seconds"
|
|
272
|
+
)
|
|
273
|
+
logger.info(f"HARDWARE INTERFACE: Executed")
|
|
274
|
+
|
|
275
|
+
# Record executed code and time
|
|
276
|
+
global_state.log_operation(module="hardware",
|
|
277
|
+
operation="executing_code",
|
|
278
|
+
data={"content": str(code[0])})
|
|
279
|
+
global_state.log_operation(module="hardware",
|
|
280
|
+
operation="hwi.dispatchDict",
|
|
281
|
+
data={"duration": step_dispatch_time})
|
|
282
|
+
|
|
283
|
+
time.sleep(1.0)
|
|
284
|
+
|
|
285
|
+
# Update task and subtask trajectories and optionally the episodic memory
|
|
286
|
+
traj += ("\n\nReflection:\n" + str(info.get("reflection", "")) +
|
|
287
|
+
"\n\n----------------------\n\nPlan:\n" +
|
|
288
|
+
info.get("executor_plan", ""))
|
|
289
|
+
subtask_traj = agent.update_episodic_memory(info, subtask_traj)
|
|
290
|
+
|
|
291
|
+
total_end_time = time.time()
|
|
292
|
+
total_duration = total_end_time - total_start_time
|
|
293
|
+
logger.info(
|
|
294
|
+
f"[Total Timing] Total execution time for this task: {total_duration:.2f} seconds"
|
|
295
|
+
)
|
|
296
|
+
global_state.log_operation(module="other",
|
|
297
|
+
operation="total_execution_time",
|
|
298
|
+
data={"duration": total_duration})
|
|
299
|
+
|
|
300
|
+
# Auto-analyze execution statistics after task completion
|
|
301
|
+
timestamp_dir = os.path.join(log_dir, datetime_str)
|
|
302
|
+
auto_analyze_execution(timestamp_dir)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def run_agent_fast(agent,
|
|
306
|
+
instruction: str,
|
|
307
|
+
hwi_para: HardwareInterface,
|
|
308
|
+
max_steps: int = 50,
|
|
309
|
+
enable_takeover: bool = False):
|
|
310
|
+
import time
|
|
311
|
+
obs = {}
|
|
312
|
+
global_state: GlobalState = Registry.get("GlobalStateStore") # type: ignore
|
|
313
|
+
global_state.set_Tu(instruction)
|
|
314
|
+
global_state.set_running_state("running")
|
|
315
|
+
hwi = hwi_para
|
|
316
|
+
|
|
317
|
+
total_start_time = time.time()
|
|
318
|
+
for step in range(max_steps):
|
|
319
|
+
while global_state.get_running_state() == "stopped":
|
|
320
|
+
user_input = input(
|
|
321
|
+
"Agent execution is paused. Enter 'continue' to resume: ")
|
|
322
|
+
if user_input == "continue":
|
|
323
|
+
global_state.set_running_state("running")
|
|
324
|
+
logger.info("[Fast Mode] Agent execution resumed by user")
|
|
325
|
+
break
|
|
326
|
+
time.sleep(0.5)
|
|
327
|
+
|
|
328
|
+
screenshot: Image.Image = hwi.dispatch(Screenshot()) # type: ignore
|
|
329
|
+
global_state.set_screenshot(
|
|
330
|
+
scale_screenshot_dimensions(screenshot, hwi_para)) # type: ignore
|
|
331
|
+
obs = global_state.get_obs_for_manager()
|
|
332
|
+
|
|
333
|
+
predict_start = time.time()
|
|
334
|
+
info, code = agent.predict(instruction=instruction,
|
|
335
|
+
observation=obs)
|
|
336
|
+
predict_time = time.time() - predict_start
|
|
337
|
+
logger.info(
|
|
338
|
+
f"[Fast Mode] [Step {step+1}] Prediction time: {predict_time:.2f} seconds"
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
global_state.log_operation(module="agent_fast",
|
|
342
|
+
operation="agent.predict_fast",
|
|
343
|
+
data={
|
|
344
|
+
"duration": predict_time,
|
|
345
|
+
"step": step + 1
|
|
346
|
+
})
|
|
347
|
+
|
|
348
|
+
if "done" in code[0]["type"].lower() or "fail" in code[0]["type"].lower(
|
|
349
|
+
):
|
|
350
|
+
logger.info(
|
|
351
|
+
f"[Fast Mode] Task {'completed' if 'done' in code[0]['type'].lower() else 'failed'}"
|
|
352
|
+
)
|
|
353
|
+
if platform.system() == "Darwin":
|
|
354
|
+
os.system(
|
|
355
|
+
f'osascript -e \'display dialog "Task Completed" with title "OpenACI Agent (Fast)" buttons "OK" default button "OK"\''
|
|
356
|
+
)
|
|
357
|
+
elif platform.system() == "Linux":
|
|
358
|
+
os.system(
|
|
359
|
+
f'zenity --info --title="OpenACI Agent (Fast)" --text="Task Completed" --width=200 --height=100'
|
|
360
|
+
)
|
|
361
|
+
break
|
|
362
|
+
|
|
363
|
+
if "wait" in code[0]["type"].lower():
|
|
364
|
+
wait_duration = code[0].get("duration", 5000) / 1000
|
|
365
|
+
logger.info(f"[Fast Mode] Waiting for {wait_duration} seconds")
|
|
366
|
+
time.sleep(wait_duration)
|
|
367
|
+
continue
|
|
368
|
+
|
|
369
|
+
if enable_takeover and "usertakeover" in code[0]["type"].lower():
|
|
370
|
+
message = code[0].get("message", "need user takeover")
|
|
371
|
+
logger.info(f"[Fast Mode] User takeover request: {message}")
|
|
372
|
+
|
|
373
|
+
global_state.set_running_state("stopped")
|
|
374
|
+
|
|
375
|
+
if platform.system() == "Darwin":
|
|
376
|
+
os.system(
|
|
377
|
+
f'osascript -e \'display dialog "{message}" with title "User takeover request (Fast)" buttons "Continue" default button "Continue"\''
|
|
378
|
+
)
|
|
379
|
+
elif platform.system() == "Linux":
|
|
380
|
+
os.system(
|
|
381
|
+
f'zenity --info --title="User takeover request (Fast)" --text="{message}" --width=300 --height=150'
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
logger.info(
|
|
385
|
+
"[Fast Mode] Agent execution paused waiting for user takeover")
|
|
386
|
+
continue
|
|
387
|
+
elif not enable_takeover and "usertakeover" in code[0]["type"].lower():
|
|
388
|
+
logger.info(
|
|
389
|
+
f"[Fast Mode] User takeover request received but takeover is disabled. Continuing execution."
|
|
390
|
+
)
|
|
391
|
+
continue
|
|
392
|
+
|
|
393
|
+
logger.info(f"[Fast Mode] Executing action: {code[0]}")
|
|
394
|
+
step_dispatch_start = time.time()
|
|
395
|
+
hwi.dispatchDict(code[0])
|
|
396
|
+
step_dispatch_time = time.time() - step_dispatch_start
|
|
397
|
+
logger.info(
|
|
398
|
+
f"[Fast Mode] Action execution time: {step_dispatch_time:.2f} seconds"
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
global_state.log_operation(module="hardware_fast",
|
|
402
|
+
operation="executing_code_fast",
|
|
403
|
+
data={
|
|
404
|
+
"content": str(code[0]),
|
|
405
|
+
"duration": step_dispatch_time,
|
|
406
|
+
"step": step + 1
|
|
407
|
+
})
|
|
408
|
+
|
|
409
|
+
time.sleep(0.5)
|
|
410
|
+
|
|
411
|
+
total_end_time = time.time()
|
|
412
|
+
total_duration = total_end_time - total_start_time
|
|
413
|
+
logger.info(
|
|
414
|
+
f"[Fast Mode] Total execution time: {total_duration:.2f} seconds")
|
|
415
|
+
global_state.log_operation(module="other",
|
|
416
|
+
operation="total_execution_time_fast",
|
|
417
|
+
data={"duration": total_duration})
|
|
418
|
+
|
|
419
|
+
# Auto-analyze execution statistics after task completion
|
|
420
|
+
timestamp_dir = os.path.join(log_dir, datetime_str)
|
|
421
|
+
auto_analyze_execution(timestamp_dir)
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def main():
|
|
425
|
+
parser = argparse.ArgumentParser(description='GUI Agent CLI Application')
|
|
426
|
+
parser.add_argument(
|
|
427
|
+
'--backend',
|
|
428
|
+
type=str,
|
|
429
|
+
default='lybic',
|
|
430
|
+
help='Backend to use (e.g., lybic, pyautogui, pyautogui_vmware)')
|
|
431
|
+
parser.add_argument('--query',
|
|
432
|
+
type=str,
|
|
433
|
+
default='',
|
|
434
|
+
help='Initial query to execute')
|
|
435
|
+
parser.add_argument('--max-steps',
|
|
436
|
+
type=int,
|
|
437
|
+
default=50,
|
|
438
|
+
help='Maximum number of steps to execute (default: 50)')
|
|
439
|
+
parser.add_argument('--mode',
|
|
440
|
+
type=str,
|
|
441
|
+
default='normal',
|
|
442
|
+
choices=['normal', 'fast'],
|
|
443
|
+
help='Agent mode: normal or fast (default: normal)')
|
|
444
|
+
parser.add_argument('--enable-takeover',
|
|
445
|
+
action='store_true',
|
|
446
|
+
help='Enable user takeover functionality')
|
|
447
|
+
parser.add_argument(
|
|
448
|
+
'--disable-search',
|
|
449
|
+
action='store_true',
|
|
450
|
+
help='Disable web search functionality (default: enabled)')
|
|
451
|
+
parser.add_argument(
|
|
452
|
+
'--lybic-sid',
|
|
453
|
+
type=str,
|
|
454
|
+
default=None,
|
|
455
|
+
help='Lybic precreated sandbox ID (if not provided, will use LYBIC_PRECREATE_SID environment variable)')
|
|
456
|
+
args = parser.parse_args()
|
|
457
|
+
|
|
458
|
+
# Ensure necessary directory structure exists
|
|
459
|
+
timestamp_dir = os.path.join(log_dir, datetime_str)
|
|
460
|
+
cache_dir = os.path.join(timestamp_dir, "cache", "screens")
|
|
461
|
+
state_dir = os.path.join(timestamp_dir, "state")
|
|
462
|
+
|
|
463
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
464
|
+
os.makedirs(state_dir, exist_ok=True)
|
|
465
|
+
|
|
466
|
+
Registry.register(
|
|
467
|
+
"GlobalStateStore",
|
|
468
|
+
GlobalState(
|
|
469
|
+
screenshot_dir=cache_dir,
|
|
470
|
+
tu_path=os.path.join(state_dir, "tu.json"),
|
|
471
|
+
search_query_path=os.path.join(state_dir, "search_query.json"),
|
|
472
|
+
completed_subtasks_path=os.path.join(state_dir,
|
|
473
|
+
"completed_subtasks.json"),
|
|
474
|
+
failed_subtasks_path=os.path.join(state_dir,
|
|
475
|
+
"failed_subtasks.json"),
|
|
476
|
+
remaining_subtasks_path=os.path.join(state_dir,
|
|
477
|
+
"remaining_subtasks.json"),
|
|
478
|
+
termination_flag_path=os.path.join(state_dir,
|
|
479
|
+
"termination_flag.json"),
|
|
480
|
+
running_state_path=os.path.join(state_dir, "running_state.json"),
|
|
481
|
+
display_info_path=os.path.join(timestamp_dir, "display.json"),
|
|
482
|
+
agent_log_path=os.path.join(timestamp_dir, "agent_log.json")))
|
|
483
|
+
global current_platform
|
|
484
|
+
# Set platform to Windows if backend is lybic
|
|
485
|
+
if args.backend == 'lybic':
|
|
486
|
+
current_platform = 'windows'
|
|
487
|
+
|
|
488
|
+
# Initialize agent based on mode
|
|
489
|
+
if args.mode == 'fast':
|
|
490
|
+
agent = AgentSFast(
|
|
491
|
+
platform=current_platform,
|
|
492
|
+
enable_takeover=args.enable_takeover,
|
|
493
|
+
enable_search=not args.disable_search,
|
|
494
|
+
)
|
|
495
|
+
logger.info("Running in FAST mode")
|
|
496
|
+
run_agent_func = run_agent_fast
|
|
497
|
+
else:
|
|
498
|
+
agent = AgentS2(
|
|
499
|
+
platform=current_platform,
|
|
500
|
+
enable_takeover=args.enable_takeover,
|
|
501
|
+
enable_search=not args.disable_search,
|
|
502
|
+
)
|
|
503
|
+
logger.info("Running in NORMAL mode with full agent")
|
|
504
|
+
run_agent_func = run_agent_normal
|
|
505
|
+
|
|
506
|
+
# Log whether user takeover is enabled
|
|
507
|
+
if args.enable_takeover:
|
|
508
|
+
logger.info("User takeover functionality is ENABLED")
|
|
509
|
+
else:
|
|
510
|
+
logger.info("User takeover functionality is DISABLED")
|
|
511
|
+
|
|
512
|
+
# Log whether web search is enabled
|
|
513
|
+
if args.disable_search:
|
|
514
|
+
logger.info("Web search functionality is DISABLED")
|
|
515
|
+
else:
|
|
516
|
+
logger.info("Web search functionality is ENABLED")
|
|
517
|
+
|
|
518
|
+
# Initialize hardware interface
|
|
519
|
+
backend_kwargs = {"platform": platform_os}
|
|
520
|
+
if args.lybic_sid is not None:
|
|
521
|
+
backend_kwargs["precreate_sid"] = args.lybic_sid
|
|
522
|
+
logger.info(f"Using Lybic SID from command line: {args.lybic_sid}")
|
|
523
|
+
else:
|
|
524
|
+
logger.info("Using Lybic SID from environment variable LYBIC_PRECREATE_SID")
|
|
525
|
+
|
|
526
|
+
hwi = HardwareInterface(backend=args.backend, **backend_kwargs)
|
|
527
|
+
|
|
528
|
+
# if query is provided, run the agent on the query
|
|
529
|
+
if args.query:
|
|
530
|
+
agent.reset()
|
|
531
|
+
run_agent_func(agent, args.query, hwi, args.max_steps,
|
|
532
|
+
args.enable_takeover)
|
|
533
|
+
|
|
534
|
+
else:
|
|
535
|
+
while True:
|
|
536
|
+
query = input("Query: ")
|
|
537
|
+
|
|
538
|
+
agent.reset()
|
|
539
|
+
|
|
540
|
+
# Run the agent on your own device
|
|
541
|
+
run_agent_func(agent, query, hwi, args.max_steps, args.enable_takeover)
|
|
542
|
+
|
|
543
|
+
response = input("Would you like to provide another query? (y/n): ")
|
|
544
|
+
if response.lower() != "y":
|
|
545
|
+
break
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
if __name__ == "__main__":
|
|
549
|
+
"""
|
|
550
|
+
python gui_agents/cli_app.py --backend lybic
|
|
551
|
+
python gui_agents/cli_app.py --backend pyautogui --mode fast
|
|
552
|
+
python gui_agents/cli_app.py --backend pyautogui_vmware
|
|
553
|
+
python gui_agents/cli_app.py --backend lybic --max-steps 15
|
|
554
|
+
python gui_agents/cli_app.py --backend lybic --mode fast --enable-takeover
|
|
555
|
+
python gui_agents/cli_app.py --backend lybic --disable-search
|
|
556
|
+
python gui_agents/cli_app.py --backend pyautogui --mode fast --disable-search
|
|
557
|
+
python gui_agents/cli_app.py --backend lybic --lybic-sid SBX-01K1X6ZKAERXAN73KTJ1XXJXAF
|
|
558
|
+
python gui_agents/cli_app.py --backend lybic --mode fast --lybic-sid SBX-01K1X6ZKAERXAN73KTJ1XXJXAF
|
|
559
|
+
"""
|
|
560
|
+
main()
|
|
File without changes
|