lybic-guiagents 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lybic-guiagents might be problematic. Click here for more details.

Files changed (85) hide show
  1. desktop_env/__init__.py +1 -0
  2. desktop_env/actions.py +203 -0
  3. desktop_env/controllers/__init__.py +0 -0
  4. desktop_env/controllers/python.py +471 -0
  5. desktop_env/controllers/setup.py +882 -0
  6. desktop_env/desktop_env.py +509 -0
  7. desktop_env/evaluators/__init__.py +5 -0
  8. desktop_env/evaluators/getters/__init__.py +41 -0
  9. desktop_env/evaluators/getters/calc.py +15 -0
  10. desktop_env/evaluators/getters/chrome.py +1774 -0
  11. desktop_env/evaluators/getters/file.py +154 -0
  12. desktop_env/evaluators/getters/general.py +42 -0
  13. desktop_env/evaluators/getters/gimp.py +38 -0
  14. desktop_env/evaluators/getters/impress.py +126 -0
  15. desktop_env/evaluators/getters/info.py +24 -0
  16. desktop_env/evaluators/getters/misc.py +406 -0
  17. desktop_env/evaluators/getters/replay.py +20 -0
  18. desktop_env/evaluators/getters/vlc.py +86 -0
  19. desktop_env/evaluators/getters/vscode.py +35 -0
  20. desktop_env/evaluators/metrics/__init__.py +160 -0
  21. desktop_env/evaluators/metrics/basic_os.py +68 -0
  22. desktop_env/evaluators/metrics/chrome.py +493 -0
  23. desktop_env/evaluators/metrics/docs.py +1011 -0
  24. desktop_env/evaluators/metrics/general.py +665 -0
  25. desktop_env/evaluators/metrics/gimp.py +637 -0
  26. desktop_env/evaluators/metrics/libreoffice.py +28 -0
  27. desktop_env/evaluators/metrics/others.py +92 -0
  28. desktop_env/evaluators/metrics/pdf.py +31 -0
  29. desktop_env/evaluators/metrics/slides.py +957 -0
  30. desktop_env/evaluators/metrics/table.py +585 -0
  31. desktop_env/evaluators/metrics/thunderbird.py +176 -0
  32. desktop_env/evaluators/metrics/utils.py +719 -0
  33. desktop_env/evaluators/metrics/vlc.py +524 -0
  34. desktop_env/evaluators/metrics/vscode.py +283 -0
  35. desktop_env/providers/__init__.py +35 -0
  36. desktop_env/providers/aws/__init__.py +0 -0
  37. desktop_env/providers/aws/manager.py +278 -0
  38. desktop_env/providers/aws/provider.py +186 -0
  39. desktop_env/providers/aws/provider_with_proxy.py +315 -0
  40. desktop_env/providers/aws/proxy_pool.py +193 -0
  41. desktop_env/providers/azure/__init__.py +0 -0
  42. desktop_env/providers/azure/manager.py +87 -0
  43. desktop_env/providers/azure/provider.py +207 -0
  44. desktop_env/providers/base.py +97 -0
  45. desktop_env/providers/gcp/__init__.py +0 -0
  46. desktop_env/providers/gcp/manager.py +0 -0
  47. desktop_env/providers/gcp/provider.py +0 -0
  48. desktop_env/providers/virtualbox/__init__.py +0 -0
  49. desktop_env/providers/virtualbox/manager.py +463 -0
  50. desktop_env/providers/virtualbox/provider.py +124 -0
  51. desktop_env/providers/vmware/__init__.py +0 -0
  52. desktop_env/providers/vmware/manager.py +455 -0
  53. desktop_env/providers/vmware/provider.py +105 -0
  54. gui_agents/__init__.py +0 -0
  55. gui_agents/agents/Action.py +209 -0
  56. gui_agents/agents/__init__.py +0 -0
  57. gui_agents/agents/agent_s.py +832 -0
  58. gui_agents/agents/global_state.py +610 -0
  59. gui_agents/agents/grounding.py +651 -0
  60. gui_agents/agents/hardware_interface.py +129 -0
  61. gui_agents/agents/manager.py +568 -0
  62. gui_agents/agents/translator.py +132 -0
  63. gui_agents/agents/worker.py +355 -0
  64. gui_agents/cli_app.py +560 -0
  65. gui_agents/core/__init__.py +0 -0
  66. gui_agents/core/engine.py +1496 -0
  67. gui_agents/core/knowledge.py +449 -0
  68. gui_agents/core/mllm.py +555 -0
  69. gui_agents/tools/__init__.py +0 -0
  70. gui_agents/tools/tools.py +727 -0
  71. gui_agents/unit_test/__init__.py +0 -0
  72. gui_agents/unit_test/run_tests.py +65 -0
  73. gui_agents/unit_test/test_manager.py +330 -0
  74. gui_agents/unit_test/test_worker.py +269 -0
  75. gui_agents/utils/__init__.py +0 -0
  76. gui_agents/utils/analyze_display.py +301 -0
  77. gui_agents/utils/common_utils.py +263 -0
  78. gui_agents/utils/display_viewer.py +281 -0
  79. gui_agents/utils/embedding_manager.py +53 -0
  80. gui_agents/utils/image_axis_utils.py +27 -0
  81. lybic_guiagents-0.1.0.dist-info/METADATA +416 -0
  82. lybic_guiagents-0.1.0.dist-info/RECORD +85 -0
  83. lybic_guiagents-0.1.0.dist-info/WHEEL +5 -0
  84. lybic_guiagents-0.1.0.dist-info/licenses/LICENSE +201 -0
  85. lybic_guiagents-0.1.0.dist-info/top_level.txt +2 -0
gui_agents/cli_app.py ADDED
@@ -0,0 +1,560 @@
1
+ import argparse
2
+ import datetime
3
+ import io
4
+ import logging
5
+ import os
6
+ import platform
7
+ import pyautogui
8
+ import sys
9
+ import time
10
+ import datetime
11
+ from pathlib import Path
12
+ from dotenv import load_dotenv
13
+ from gui_agents.agents.Backend.PyAutoGUIBackend import PyAutoGUIBackend
14
+
15
+ env_path = Path(os.path.dirname(os.path.abspath(__file__))) / '.env'
16
+ if env_path.exists():
17
+ load_dotenv(dotenv_path=env_path)
18
+ else:
19
+ parent_env_path = Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) / '.env'
20
+ if parent_env_path.exists():
21
+ load_dotenv(dotenv_path=parent_env_path)
22
+
23
+ from PIL import Image
24
+
25
+ # from gui_agents.agents.grounding import OSWorldACI
26
+ from gui_agents.agents.Action import Screenshot
27
+ from gui_agents.agents.agent_s import AgentS2, AgentSFast
28
+
29
+ from gui_agents.store.registry import Registry
30
+ from gui_agents.agents.global_state import GlobalState
31
+ from gui_agents.agents.hardware_interface import HardwareInterface
32
+
33
+ # Import analyze_display functionality
34
+ from gui_agents.utils.analyze_display import analyze_display_json, aggregate_results, format_output_line
35
+
36
+ current_platform = platform.system().lower()
37
+
38
+ logger = logging.getLogger()
39
+ logger.setLevel(logging.DEBUG)
40
+
41
+ datetime_str: str = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
42
+
43
+ log_dir = "runtime"
44
+ os.makedirs(os.path.join(log_dir, datetime_str), exist_ok=True)
45
+
46
+ file_handler = logging.FileHandler(
47
+ os.path.join(log_dir, datetime_str, "normal.log"), encoding="utf-8"
48
+ )
49
+ debug_handler = logging.FileHandler(
50
+ os.path.join(log_dir, datetime_str, "debug.log"), encoding="utf-8"
51
+ )
52
+ stdout_handler = logging.StreamHandler(sys.stdout)
53
+ sdebug_handler = logging.FileHandler(
54
+ os.path.join(log_dir, datetime_str, "sdebug.log"), encoding="utf-8"
55
+ )
56
+
57
+ file_handler.setLevel(logging.INFO)
58
+ debug_handler.setLevel(logging.DEBUG)
59
+ stdout_handler.setLevel(logging.INFO)
60
+ sdebug_handler.setLevel(logging.DEBUG)
61
+
62
+ formatter = logging.Formatter(
63
+ fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s"
64
+ )
65
+ file_handler.setFormatter(formatter)
66
+ debug_handler.setFormatter(formatter)
67
+ stdout_handler.setFormatter(formatter)
68
+ sdebug_handler.setFormatter(formatter)
69
+
70
+ stdout_handler.addFilter(logging.Filter("desktopenv"))
71
+ sdebug_handler.addFilter(logging.Filter("desktopenv"))
72
+
73
+ logger.addHandler(file_handler)
74
+ logger.addHandler(debug_handler)
75
+ logger.addHandler(stdout_handler)
76
+ logger.addHandler(sdebug_handler)
77
+
78
+ platform_os = platform.system()
79
+
80
+
81
+ def auto_analyze_execution(timestamp_dir: str):
82
+ """
83
+ Automatically analyze execution statistics from display.json files after task completion
84
+
85
+ Args:
86
+ timestamp_dir: Directory containing the execution logs and display.json
87
+ """
88
+ import time
89
+
90
+ try:
91
+ # Analyze the display.json file for this execution
92
+ display_json_path = os.path.join(timestamp_dir, "display.json")
93
+
94
+ # Wait for file to be fully written
95
+ max_wait_time = 10 # Maximum wait time in seconds
96
+ wait_interval = 0.5 # Check every 0.5 seconds
97
+ waited_time = 0
98
+
99
+ while waited_time < max_wait_time:
100
+ if os.path.exists(display_json_path):
101
+ # Check if file is still being written by monitoring its size
102
+ try:
103
+ size1 = os.path.getsize(display_json_path)
104
+ time.sleep(wait_interval)
105
+ size2 = os.path.getsize(display_json_path)
106
+
107
+ # If file size hasn't changed in the last 0.5 seconds, it's likely complete
108
+ if size1 == size2:
109
+ logger.info(f"Display.json file appears to be complete (size: {size1} bytes)")
110
+ break
111
+ else:
112
+ logger.info(f"Display.json file still being written (size changed from {size1} to {size2} bytes)")
113
+ waited_time += wait_interval
114
+ continue
115
+ except OSError:
116
+ # File might be temporarily inaccessible
117
+ time.sleep(wait_interval)
118
+ waited_time += wait_interval
119
+ continue
120
+ else:
121
+ logger.info(f"Waiting for display.json file to be created... ({waited_time:.1f}s)")
122
+ time.sleep(wait_interval)
123
+ waited_time += wait_interval
124
+
125
+ if os.path.exists(display_json_path):
126
+ logger.info(f"Auto-analyzing execution statistics from: {display_json_path}")
127
+
128
+ # Analyze the single display.json file
129
+ result = analyze_display_json(display_json_path)
130
+
131
+ if result:
132
+ # Format and log the statistics
133
+ output_line = format_output_line(result)
134
+ logger.info("=" * 80)
135
+ logger.info("EXECUTION STATISTICS:")
136
+ logger.info("Steps, Duration (seconds), (Input Tokens, Output Tokens, Total Tokens), Cost")
137
+ logger.info("=" * 80)
138
+ logger.info(output_line)
139
+ logger.info("=" * 80)
140
+
141
+ # Also print to console for immediate visibility
142
+ print("\n" + "=" * 80)
143
+ print("EXECUTION STATISTICS:")
144
+ print("Steps, Duration (seconds), (Input Tokens, Output Tokens, Total Tokens), Cost")
145
+ print("=" * 80)
146
+ print(output_line)
147
+ print("=" * 80)
148
+ else:
149
+ logger.warning("No valid data found in display.json for analysis")
150
+ else:
151
+ logger.warning(f"Display.json file not found at: {display_json_path} after waiting {max_wait_time} seconds")
152
+
153
+ except Exception as e:
154
+ logger.error(f"Error during auto-analysis: {e}")
155
+
156
+
157
+ def show_permission_dialog(code: str, action_description: str):
158
+ """Show a platform-specific permission dialog and return True if approved."""
159
+ if platform.system() == "Darwin":
160
+ result = os.system(
161
+ f'osascript -e \'display dialog "Do you want to execute this action?\n\n{code} which will try to {action_description}" with title "Action Permission" buttons {{"Cancel", "OK"}} default button "OK" cancel button "Cancel"\''
162
+ )
163
+ return result == 0
164
+ elif platform.system() == "Linux":
165
+ result = os.system(
166
+ f'zenity --question --title="Action Permission" --text="Do you want to execute this action?\n\n{code}" --width=400 --height=200'
167
+ )
168
+ return result == 0
169
+ return False
170
+
171
+
172
+ def scale_screenshot_dimensions(screenshot: Image.Image, hwi_para: HardwareInterface):
173
+ screenshot_high = screenshot.height
174
+ screenshot_width = screenshot.width
175
+ if isinstance(hwi_para.backend, PyAutoGUIBackend):
176
+ screen_width, screen_height = pyautogui.size()
177
+ if screen_width != screenshot_width or screen_height != screenshot_high:
178
+ screenshot = screenshot.resize((screen_width, screen_height), Image.Resampling.LANCZOS)
179
+
180
+ return screenshot
181
+
182
+ def run_agent_normal(agent, instruction: str, hwi_para: HardwareInterface, max_steps: int = 50, enable_takeover: bool = False):
183
+ import time
184
+ obs = {}
185
+ traj = "Task:\n" + instruction
186
+ subtask_traj = ""
187
+ global_state: GlobalState = Registry.get("GlobalStateStore") # type: ignore
188
+ global_state.set_Tu(instruction)
189
+ global_state.set_running_state("running")
190
+ hwi = hwi_para
191
+
192
+ total_start_time = time.time()
193
+ for _ in range(max_steps):
194
+ while global_state.get_running_state() == "stopped":
195
+ user_input = input(
196
+ "Agent execution is paused. Enter 'continue' to resume: ")
197
+ if user_input == "continue":
198
+ global_state.set_running_state("running")
199
+ logger.info("Agent execution resumed by user")
200
+ break
201
+ time.sleep(0.5)
202
+
203
+ screenshot: Image.Image = hwi.dispatch(Screenshot()) # type: ignore
204
+ global_state.set_screenshot(
205
+ scale_screenshot_dimensions(screenshot, hwi_para)) # type: ignore
206
+ obs = global_state.get_obs_for_manager()
207
+
208
+ predict_start = time.time()
209
+ info, code = agent.predict(instruction=instruction, observation=obs)
210
+ predict_time = time.time() - predict_start
211
+ logger.info(
212
+ f"[Step Timing] agent.predict execution time: {predict_time:.2f} seconds"
213
+ )
214
+
215
+ global_state.log_operation(module="agent",
216
+ operation="agent.predict",
217
+ data={"duration": predict_time})
218
+
219
+ if "done" in code[0]["type"].lower() or "fail" in code[0]["type"].lower(
220
+ ):
221
+ if platform.system() == "Darwin":
222
+ os.system(
223
+ f'osascript -e \'display dialog "Task Completed" with title "OpenACI Agent" buttons "OK" default button "OK"\''
224
+ )
225
+ elif platform.system() == "Linux":
226
+ os.system(
227
+ f'zenity --info --title="OpenACI Agent" --text="Task Completed" --width=200 --height=100'
228
+ )
229
+
230
+ agent.update_narrative_memory(traj)
231
+ break
232
+
233
+ if "next" in code[0]["type"].lower():
234
+ continue
235
+
236
+ if "wait" in code[0]["type"].lower():
237
+ time.sleep(5)
238
+ continue
239
+
240
+ if enable_takeover and "usertakeover" in code[0]["type"].lower():
241
+ message = code[0].get("message", "need user takeover")
242
+ logger.info(f"User takeover request: {message}")
243
+
244
+ global_state.set_running_state("stopped")
245
+
246
+ if platform.system() == "Darwin":
247
+ os.system(
248
+ f'osascript -e \'display dialog "{message}" with title "User takeover request" buttons "Continue" default button "Continue"\''
249
+ )
250
+ elif platform.system() == "Linux":
251
+ os.system(
252
+ f'zenity --info --title="User takeover request" --text="{message}" --width=300 --height=150'
253
+ )
254
+
255
+ logger.info("Agent execution paused waiting for user takeover")
256
+ continue
257
+ elif not enable_takeover and "usertakeover" in code[0]["type"].lower():
258
+ logger.info(
259
+ f"User takeover request received but takeover is disabled. Continuing execution."
260
+ )
261
+ continue
262
+
263
+ else:
264
+ time.sleep(1.0)
265
+ logger.info(f"EXECUTING CODE: {code[0]}")
266
+
267
+ step_dispatch_start = time.time()
268
+ hwi.dispatchDict(code[0])
269
+ step_dispatch_time = time.time() - step_dispatch_start
270
+ logger.info(
271
+ f"[Step Timing] hwi.dispatchDict execution time: {step_dispatch_time:.2f} seconds"
272
+ )
273
+ logger.info(f"HARDWARE INTERFACE: Executed")
274
+
275
+ # Record executed code and time
276
+ global_state.log_operation(module="hardware",
277
+ operation="executing_code",
278
+ data={"content": str(code[0])})
279
+ global_state.log_operation(module="hardware",
280
+ operation="hwi.dispatchDict",
281
+ data={"duration": step_dispatch_time})
282
+
283
+ time.sleep(1.0)
284
+
285
+ # Update task and subtask trajectories and optionally the episodic memory
286
+ traj += ("\n\nReflection:\n" + str(info.get("reflection", "")) +
287
+ "\n\n----------------------\n\nPlan:\n" +
288
+ info.get("executor_plan", ""))
289
+ subtask_traj = agent.update_episodic_memory(info, subtask_traj)
290
+
291
+ total_end_time = time.time()
292
+ total_duration = total_end_time - total_start_time
293
+ logger.info(
294
+ f"[Total Timing] Total execution time for this task: {total_duration:.2f} seconds"
295
+ )
296
+ global_state.log_operation(module="other",
297
+ operation="total_execution_time",
298
+ data={"duration": total_duration})
299
+
300
+ # Auto-analyze execution statistics after task completion
301
+ timestamp_dir = os.path.join(log_dir, datetime_str)
302
+ auto_analyze_execution(timestamp_dir)
303
+
304
+
305
+ def run_agent_fast(agent,
306
+ instruction: str,
307
+ hwi_para: HardwareInterface,
308
+ max_steps: int = 50,
309
+ enable_takeover: bool = False):
310
+ import time
311
+ obs = {}
312
+ global_state: GlobalState = Registry.get("GlobalStateStore") # type: ignore
313
+ global_state.set_Tu(instruction)
314
+ global_state.set_running_state("running")
315
+ hwi = hwi_para
316
+
317
+ total_start_time = time.time()
318
+ for step in range(max_steps):
319
+ while global_state.get_running_state() == "stopped":
320
+ user_input = input(
321
+ "Agent execution is paused. Enter 'continue' to resume: ")
322
+ if user_input == "continue":
323
+ global_state.set_running_state("running")
324
+ logger.info("[Fast Mode] Agent execution resumed by user")
325
+ break
326
+ time.sleep(0.5)
327
+
328
+ screenshot: Image.Image = hwi.dispatch(Screenshot()) # type: ignore
329
+ global_state.set_screenshot(
330
+ scale_screenshot_dimensions(screenshot, hwi_para)) # type: ignore
331
+ obs = global_state.get_obs_for_manager()
332
+
333
+ predict_start = time.time()
334
+ info, code = agent.predict(instruction=instruction,
335
+ observation=obs)
336
+ predict_time = time.time() - predict_start
337
+ logger.info(
338
+ f"[Fast Mode] [Step {step+1}] Prediction time: {predict_time:.2f} seconds"
339
+ )
340
+
341
+ global_state.log_operation(module="agent_fast",
342
+ operation="agent.predict_fast",
343
+ data={
344
+ "duration": predict_time,
345
+ "step": step + 1
346
+ })
347
+
348
+ if "done" in code[0]["type"].lower() or "fail" in code[0]["type"].lower(
349
+ ):
350
+ logger.info(
351
+ f"[Fast Mode] Task {'completed' if 'done' in code[0]['type'].lower() else 'failed'}"
352
+ )
353
+ if platform.system() == "Darwin":
354
+ os.system(
355
+ f'osascript -e \'display dialog "Task Completed" with title "OpenACI Agent (Fast)" buttons "OK" default button "OK"\''
356
+ )
357
+ elif platform.system() == "Linux":
358
+ os.system(
359
+ f'zenity --info --title="OpenACI Agent (Fast)" --text="Task Completed" --width=200 --height=100'
360
+ )
361
+ break
362
+
363
+ if "wait" in code[0]["type"].lower():
364
+ wait_duration = code[0].get("duration", 5000) / 1000
365
+ logger.info(f"[Fast Mode] Waiting for {wait_duration} seconds")
366
+ time.sleep(wait_duration)
367
+ continue
368
+
369
+ if enable_takeover and "usertakeover" in code[0]["type"].lower():
370
+ message = code[0].get("message", "need user takeover")
371
+ logger.info(f"[Fast Mode] User takeover request: {message}")
372
+
373
+ global_state.set_running_state("stopped")
374
+
375
+ if platform.system() == "Darwin":
376
+ os.system(
377
+ f'osascript -e \'display dialog "{message}" with title "User takeover request (Fast)" buttons "Continue" default button "Continue"\''
378
+ )
379
+ elif platform.system() == "Linux":
380
+ os.system(
381
+ f'zenity --info --title="User takeover request (Fast)" --text="{message}" --width=300 --height=150'
382
+ )
383
+
384
+ logger.info(
385
+ "[Fast Mode] Agent execution paused waiting for user takeover")
386
+ continue
387
+ elif not enable_takeover and "usertakeover" in code[0]["type"].lower():
388
+ logger.info(
389
+ f"[Fast Mode] User takeover request received but takeover is disabled. Continuing execution."
390
+ )
391
+ continue
392
+
393
+ logger.info(f"[Fast Mode] Executing action: {code[0]}")
394
+ step_dispatch_start = time.time()
395
+ hwi.dispatchDict(code[0])
396
+ step_dispatch_time = time.time() - step_dispatch_start
397
+ logger.info(
398
+ f"[Fast Mode] Action execution time: {step_dispatch_time:.2f} seconds"
399
+ )
400
+
401
+ global_state.log_operation(module="hardware_fast",
402
+ operation="executing_code_fast",
403
+ data={
404
+ "content": str(code[0]),
405
+ "duration": step_dispatch_time,
406
+ "step": step + 1
407
+ })
408
+
409
+ time.sleep(0.5)
410
+
411
+ total_end_time = time.time()
412
+ total_duration = total_end_time - total_start_time
413
+ logger.info(
414
+ f"[Fast Mode] Total execution time: {total_duration:.2f} seconds")
415
+ global_state.log_operation(module="other",
416
+ operation="total_execution_time_fast",
417
+ data={"duration": total_duration})
418
+
419
+ # Auto-analyze execution statistics after task completion
420
+ timestamp_dir = os.path.join(log_dir, datetime_str)
421
+ auto_analyze_execution(timestamp_dir)
422
+
423
+
424
+ def main():
425
+ parser = argparse.ArgumentParser(description='GUI Agent CLI Application')
426
+ parser.add_argument(
427
+ '--backend',
428
+ type=str,
429
+ default='lybic',
430
+ help='Backend to use (e.g., lybic, pyautogui, pyautogui_vmware)')
431
+ parser.add_argument('--query',
432
+ type=str,
433
+ default='',
434
+ help='Initial query to execute')
435
+ parser.add_argument('--max-steps',
436
+ type=int,
437
+ default=50,
438
+ help='Maximum number of steps to execute (default: 50)')
439
+ parser.add_argument('--mode',
440
+ type=str,
441
+ default='normal',
442
+ choices=['normal', 'fast'],
443
+ help='Agent mode: normal or fast (default: normal)')
444
+ parser.add_argument('--enable-takeover',
445
+ action='store_true',
446
+ help='Enable user takeover functionality')
447
+ parser.add_argument(
448
+ '--disable-search',
449
+ action='store_true',
450
+ help='Disable web search functionality (default: enabled)')
451
+ parser.add_argument(
452
+ '--lybic-sid',
453
+ type=str,
454
+ default=None,
455
+ help='Lybic precreated sandbox ID (if not provided, will use LYBIC_PRECREATE_SID environment variable)')
456
+ args = parser.parse_args()
457
+
458
+ # Ensure necessary directory structure exists
459
+ timestamp_dir = os.path.join(log_dir, datetime_str)
460
+ cache_dir = os.path.join(timestamp_dir, "cache", "screens")
461
+ state_dir = os.path.join(timestamp_dir, "state")
462
+
463
+ os.makedirs(cache_dir, exist_ok=True)
464
+ os.makedirs(state_dir, exist_ok=True)
465
+
466
+ Registry.register(
467
+ "GlobalStateStore",
468
+ GlobalState(
469
+ screenshot_dir=cache_dir,
470
+ tu_path=os.path.join(state_dir, "tu.json"),
471
+ search_query_path=os.path.join(state_dir, "search_query.json"),
472
+ completed_subtasks_path=os.path.join(state_dir,
473
+ "completed_subtasks.json"),
474
+ failed_subtasks_path=os.path.join(state_dir,
475
+ "failed_subtasks.json"),
476
+ remaining_subtasks_path=os.path.join(state_dir,
477
+ "remaining_subtasks.json"),
478
+ termination_flag_path=os.path.join(state_dir,
479
+ "termination_flag.json"),
480
+ running_state_path=os.path.join(state_dir, "running_state.json"),
481
+ display_info_path=os.path.join(timestamp_dir, "display.json"),
482
+ agent_log_path=os.path.join(timestamp_dir, "agent_log.json")))
483
+ global current_platform
484
+ # Set platform to Windows if backend is lybic
485
+ if args.backend == 'lybic':
486
+ current_platform = 'windows'
487
+
488
+ # Initialize agent based on mode
489
+ if args.mode == 'fast':
490
+ agent = AgentSFast(
491
+ platform=current_platform,
492
+ enable_takeover=args.enable_takeover,
493
+ enable_search=not args.disable_search,
494
+ )
495
+ logger.info("Running in FAST mode")
496
+ run_agent_func = run_agent_fast
497
+ else:
498
+ agent = AgentS2(
499
+ platform=current_platform,
500
+ enable_takeover=args.enable_takeover,
501
+ enable_search=not args.disable_search,
502
+ )
503
+ logger.info("Running in NORMAL mode with full agent")
504
+ run_agent_func = run_agent_normal
505
+
506
+ # Log whether user takeover is enabled
507
+ if args.enable_takeover:
508
+ logger.info("User takeover functionality is ENABLED")
509
+ else:
510
+ logger.info("User takeover functionality is DISABLED")
511
+
512
+ # Log whether web search is enabled
513
+ if args.disable_search:
514
+ logger.info("Web search functionality is DISABLED")
515
+ else:
516
+ logger.info("Web search functionality is ENABLED")
517
+
518
+ # Initialize hardware interface
519
+ backend_kwargs = {"platform": platform_os}
520
+ if args.lybic_sid is not None:
521
+ backend_kwargs["precreate_sid"] = args.lybic_sid
522
+ logger.info(f"Using Lybic SID from command line: {args.lybic_sid}")
523
+ else:
524
+ logger.info("Using Lybic SID from environment variable LYBIC_PRECREATE_SID")
525
+
526
+ hwi = HardwareInterface(backend=args.backend, **backend_kwargs)
527
+
528
+ # if query is provided, run the agent on the query
529
+ if args.query:
530
+ agent.reset()
531
+ run_agent_func(agent, args.query, hwi, args.max_steps,
532
+ args.enable_takeover)
533
+
534
+ else:
535
+ while True:
536
+ query = input("Query: ")
537
+
538
+ agent.reset()
539
+
540
+ # Run the agent on your own device
541
+ run_agent_func(agent, query, hwi, args.max_steps, args.enable_takeover)
542
+
543
+ response = input("Would you like to provide another query? (y/n): ")
544
+ if response.lower() != "y":
545
+ break
546
+
547
+
548
+ if __name__ == "__main__":
549
+ """
550
+ python gui_agents/cli_app.py --backend lybic
551
+ python gui_agents/cli_app.py --backend pyautogui --mode fast
552
+ python gui_agents/cli_app.py --backend pyautogui_vmware
553
+ python gui_agents/cli_app.py --backend lybic --max-steps 15
554
+ python gui_agents/cli_app.py --backend lybic --mode fast --enable-takeover
555
+ python gui_agents/cli_app.py --backend lybic --disable-search
556
+ python gui_agents/cli_app.py --backend pyautogui --mode fast --disable-search
557
+ python gui_agents/cli_app.py --backend lybic --lybic-sid SBX-01K1X6ZKAERXAN73KTJ1XXJXAF
558
+ python gui_agents/cli_app.py --backend lybic --mode fast --lybic-sid SBX-01K1X6ZKAERXAN73KTJ1XXJXAF
559
+ """
560
+ main()
File without changes