lybic-guiagents 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lybic-guiagents might be problematic. Click here for more details.

Files changed (85) hide show
  1. desktop_env/__init__.py +1 -0
  2. desktop_env/actions.py +203 -0
  3. desktop_env/controllers/__init__.py +0 -0
  4. desktop_env/controllers/python.py +471 -0
  5. desktop_env/controllers/setup.py +882 -0
  6. desktop_env/desktop_env.py +509 -0
  7. desktop_env/evaluators/__init__.py +5 -0
  8. desktop_env/evaluators/getters/__init__.py +41 -0
  9. desktop_env/evaluators/getters/calc.py +15 -0
  10. desktop_env/evaluators/getters/chrome.py +1774 -0
  11. desktop_env/evaluators/getters/file.py +154 -0
  12. desktop_env/evaluators/getters/general.py +42 -0
  13. desktop_env/evaluators/getters/gimp.py +38 -0
  14. desktop_env/evaluators/getters/impress.py +126 -0
  15. desktop_env/evaluators/getters/info.py +24 -0
  16. desktop_env/evaluators/getters/misc.py +406 -0
  17. desktop_env/evaluators/getters/replay.py +20 -0
  18. desktop_env/evaluators/getters/vlc.py +86 -0
  19. desktop_env/evaluators/getters/vscode.py +35 -0
  20. desktop_env/evaluators/metrics/__init__.py +160 -0
  21. desktop_env/evaluators/metrics/basic_os.py +68 -0
  22. desktop_env/evaluators/metrics/chrome.py +493 -0
  23. desktop_env/evaluators/metrics/docs.py +1011 -0
  24. desktop_env/evaluators/metrics/general.py +665 -0
  25. desktop_env/evaluators/metrics/gimp.py +637 -0
  26. desktop_env/evaluators/metrics/libreoffice.py +28 -0
  27. desktop_env/evaluators/metrics/others.py +92 -0
  28. desktop_env/evaluators/metrics/pdf.py +31 -0
  29. desktop_env/evaluators/metrics/slides.py +957 -0
  30. desktop_env/evaluators/metrics/table.py +585 -0
  31. desktop_env/evaluators/metrics/thunderbird.py +176 -0
  32. desktop_env/evaluators/metrics/utils.py +719 -0
  33. desktop_env/evaluators/metrics/vlc.py +524 -0
  34. desktop_env/evaluators/metrics/vscode.py +283 -0
  35. desktop_env/providers/__init__.py +35 -0
  36. desktop_env/providers/aws/__init__.py +0 -0
  37. desktop_env/providers/aws/manager.py +278 -0
  38. desktop_env/providers/aws/provider.py +186 -0
  39. desktop_env/providers/aws/provider_with_proxy.py +315 -0
  40. desktop_env/providers/aws/proxy_pool.py +193 -0
  41. desktop_env/providers/azure/__init__.py +0 -0
  42. desktop_env/providers/azure/manager.py +87 -0
  43. desktop_env/providers/azure/provider.py +207 -0
  44. desktop_env/providers/base.py +97 -0
  45. desktop_env/providers/gcp/__init__.py +0 -0
  46. desktop_env/providers/gcp/manager.py +0 -0
  47. desktop_env/providers/gcp/provider.py +0 -0
  48. desktop_env/providers/virtualbox/__init__.py +0 -0
  49. desktop_env/providers/virtualbox/manager.py +463 -0
  50. desktop_env/providers/virtualbox/provider.py +124 -0
  51. desktop_env/providers/vmware/__init__.py +0 -0
  52. desktop_env/providers/vmware/manager.py +455 -0
  53. desktop_env/providers/vmware/provider.py +105 -0
  54. gui_agents/__init__.py +0 -0
  55. gui_agents/agents/Action.py +209 -0
  56. gui_agents/agents/__init__.py +0 -0
  57. gui_agents/agents/agent_s.py +832 -0
  58. gui_agents/agents/global_state.py +610 -0
  59. gui_agents/agents/grounding.py +651 -0
  60. gui_agents/agents/hardware_interface.py +129 -0
  61. gui_agents/agents/manager.py +568 -0
  62. gui_agents/agents/translator.py +132 -0
  63. gui_agents/agents/worker.py +355 -0
  64. gui_agents/cli_app.py +560 -0
  65. gui_agents/core/__init__.py +0 -0
  66. gui_agents/core/engine.py +1496 -0
  67. gui_agents/core/knowledge.py +449 -0
  68. gui_agents/core/mllm.py +555 -0
  69. gui_agents/tools/__init__.py +0 -0
  70. gui_agents/tools/tools.py +727 -0
  71. gui_agents/unit_test/__init__.py +0 -0
  72. gui_agents/unit_test/run_tests.py +65 -0
  73. gui_agents/unit_test/test_manager.py +330 -0
  74. gui_agents/unit_test/test_worker.py +269 -0
  75. gui_agents/utils/__init__.py +0 -0
  76. gui_agents/utils/analyze_display.py +301 -0
  77. gui_agents/utils/common_utils.py +263 -0
  78. gui_agents/utils/display_viewer.py +281 -0
  79. gui_agents/utils/embedding_manager.py +53 -0
  80. gui_agents/utils/image_axis_utils.py +27 -0
  81. lybic_guiagents-0.1.0.dist-info/METADATA +416 -0
  82. lybic_guiagents-0.1.0.dist-info/RECORD +85 -0
  83. lybic_guiagents-0.1.0.dist-info/WHEEL +5 -0
  84. lybic_guiagents-0.1.0.dist-info/licenses/LICENSE +201 -0
  85. lybic_guiagents-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,610 @@
1
+ # global_state.py
2
+ import json, os, time, logging, io
3
+ from pathlib import Path
4
+ from typing import List, Optional, Dict, Any
5
+
6
+ from PIL import Image
7
+
8
+ from gui_agents.utils.common_utils import Node
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ # ========= File Lock Tools =========
13
+ from contextlib import contextmanager
14
+ if os.name == "nt":
15
+ import msvcrt, time as _t
16
+
17
+ @contextmanager
18
+ def locked(path: Path, mode: str):
19
+ # Always use UTF-8 encoding for text files on Windows
20
+ if 'b' in mode:
21
+ f = open(path, mode)
22
+ else:
23
+ f = open(path, mode, encoding="utf-8")
24
+ try:
25
+ while True:
26
+ try:
27
+ msvcrt.locking(f.fileno(), msvcrt.LK_NBLCK, 1)
28
+ break
29
+ except OSError:
30
+ _t.sleep(0.01)
31
+ yield f
32
+ finally:
33
+ f.seek(0)
34
+ msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1)
35
+ f.close()
36
+ else:
37
+ import fcntl
38
+
39
+ @contextmanager
40
+ def locked(path: Path, mode: str):
41
+ # Always use UTF-8 encoding for text files on Unix-like systems
42
+ if 'b' in mode:
43
+ f = open(path, mode)
44
+ else:
45
+ f = open(path, mode, encoding="utf-8")
46
+ try:
47
+ fcntl.flock(f.fileno(), fcntl.LOCK_EX)
48
+ yield f
49
+ finally:
50
+ fcntl.flock(f.fileno(), fcntl.LOCK_UN)
51
+ f.close()
52
+
53
+ # ========= Node Encoding/Decoding =========
54
+ def node_to_dict(node: Node):
55
+ if hasattr(node, "to_dict"):
56
+ return node.to_dict() # type: ignore
57
+ else:
58
+ return vars(node)
59
+
60
+ def node_from_dict(d: dict) -> Node:
61
+ if hasattr(Node, "from_dict"):
62
+ return Node.from_dict(d) # type: ignore
63
+ return Node(**d) # type: ignore
64
+
65
+ # ========= Safe JSON Operations =========
66
+ def safe_json_dump(data: Any, file_handle, **kwargs) -> None:
67
+ """
68
+ Safely dump JSON data with proper encoding handling
69
+ """
70
+ # Ensure UTF-8 encoding and ASCII fallback for problematic characters
71
+ kwargs.setdefault('ensure_ascii', False)
72
+ kwargs.setdefault('indent', 2)
73
+
74
+ try:
75
+ json.dump(data, file_handle, **kwargs)
76
+ except UnicodeEncodeError as e:
77
+ logger.warning(
78
+ f"UnicodeEncodeError during JSON dump: {e}. Falling back to ASCII mode."
79
+ )
80
+ # Fallback to ASCII mode if UTF-8 fails
81
+ kwargs['ensure_ascii'] = True
82
+ json.dump(data, file_handle, **kwargs)
83
+
84
+
85
+ def safe_json_load(file_handle) -> Any:
86
+ """
87
+ Safely load JSON data with proper encoding handling
88
+ """
89
+ try:
90
+ return json.load(file_handle)
91
+ except UnicodeDecodeError as e:
92
+ logger.warning(
93
+ f"UnicodeDecodeError during JSON load: {e}. Attempting recovery.")
94
+ # Try to read with different encodings
95
+ file_handle.seek(0)
96
+ content = file_handle.read()
97
+
98
+ # Try common encodings
99
+ for encoding in ['utf-8-sig', 'latin1', 'cp1252']:
100
+ try:
101
+ if isinstance(content, bytes):
102
+ decoded_content = content.decode(encoding)
103
+ else:
104
+ decoded_content = content
105
+ return json.loads(decoded_content)
106
+ except (UnicodeDecodeError, json.JSONDecodeError):
107
+ continue
108
+
109
+ # If all encodings fail, return empty dict/list
110
+ logger.error(
111
+ "Failed to decode JSON with all attempted encodings. Returning empty data."
112
+ )
113
+ return {}
114
+
115
+
116
+ # ========= Safe File Operations =========
117
+ def safe_write_text(path: Path, content: str) -> None:
118
+ """
119
+ Safely write text to file with UTF-8 encoding
120
+ """
121
+ try:
122
+ path.write_text(content, encoding='utf-8')
123
+ except UnicodeEncodeError as e:
124
+ logger.warning(
125
+ f"UnicodeEncodeError writing to {path}: {e}. Using error handling.")
126
+ # Write with error handling - replace problematic characters
127
+ path.write_text(content, encoding='utf-8', errors='replace')
128
+
129
+
130
+ def safe_read_text(path: Path) -> str:
131
+ """
132
+ Safely read text from file with proper encoding handling
133
+ """
134
+ try:
135
+ return path.read_text(encoding='utf-8')
136
+ except UnicodeDecodeError as e:
137
+ logger.warning(
138
+ f"UnicodeDecodeError reading {path}: {e}. Trying alternative encodings."
139
+ )
140
+ # Try different encodings
141
+ for encoding in ['utf-8-sig', 'latin1', 'cp1252', 'gbk']:
142
+ try:
143
+ return path.read_text(encoding=encoding)
144
+ except UnicodeDecodeError:
145
+ continue
146
+
147
+ # If all fail, read with error handling
148
+ logger.error(
149
+ f"Failed to decode {path} with all encodings. Using error replacement."
150
+ )
151
+ return path.read_text(encoding='utf-8', errors='replace')
152
+
153
+
154
+ # ========= GlobalState =========
155
+ class GlobalState:
156
+ """Centralized management for global state (screenshots / instructions / subtask lists, etc.) read and write"""
157
+
158
+ def __init__(
159
+ self,
160
+ *,
161
+ screenshot_dir: str,
162
+ tu_path: str,
163
+ search_query_path: str,
164
+ completed_subtasks_path: str,
165
+ failed_subtasks_path: str,
166
+ remaining_subtasks_path: str,
167
+ termination_flag_path: str,
168
+ running_state_path: str,
169
+ agent_log_path: str,
170
+ display_info_path:
171
+ str = "", # New parameter for storing display information
172
+ ):
173
+ self.screenshot_dir = Path(screenshot_dir)
174
+ self.tu_path = Path(tu_path)
175
+ self.search_query_path = Path(search_query_path)
176
+ self.completed_subtasks_path = Path(completed_subtasks_path)
177
+ self.failed_subtasks_path = Path(failed_subtasks_path)
178
+ self.remaining_subtasks_path = Path(remaining_subtasks_path)
179
+ self.termination_flag_path = Path(termination_flag_path)
180
+ self.running_state_path = Path(running_state_path)
181
+ self.agent_log_path = Path(agent_log_path)
182
+
183
+ # If display_info_path is not provided, create display.json in the same directory as running_state_path
184
+ if not display_info_path:
185
+ self.display_info_path = Path(
186
+ os.path.join(self.running_state_path.parent, "display.json"))
187
+ else:
188
+ self.display_info_path = Path(display_info_path)
189
+
190
+ # Ensure necessary directories / files exist
191
+ self.screenshot_dir.mkdir(parents=True, exist_ok=True)
192
+ for p in [
193
+ self.tu_path,
194
+ self.search_query_path,
195
+ self.completed_subtasks_path,
196
+ self.failed_subtasks_path,
197
+ self.remaining_subtasks_path,
198
+ self.termination_flag_path,
199
+ self.running_state_path,
200
+ self.display_info_path,
201
+ self.agent_log_path,
202
+ ]:
203
+ if not p.exists():
204
+ p.parent.mkdir(parents=True, exist_ok=True)
205
+ if p in [
206
+ self.completed_subtasks_path, self.failed_subtasks_path,
207
+ self.remaining_subtasks_path, self.agent_log_path
208
+ ]:
209
+ safe_write_text(p, "[]")
210
+ elif p in [self.display_info_path]:
211
+ safe_write_text(p, "{}")
212
+ else:
213
+ safe_write_text(p, "")
214
+
215
+ # ---------- Common Private Tools ----------
216
+ def _load_subtasks(self, path: Path) -> List[Node]:
217
+ try:
218
+ with locked(path, "r") as f:
219
+ data = safe_json_load(f)
220
+ return [node_from_dict(d) for d in data]
221
+ except Exception as e:
222
+ logger.warning(f"Failed to load subtasks from {path}: {e}")
223
+ return []
224
+
225
+ def _save_subtasks(self, path: Path, nodes: List[Node]) -> None:
226
+ tmp = path.with_suffix(".tmp")
227
+ serialised = [node_to_dict(n) for n in nodes]
228
+ try:
229
+ with locked(tmp, "w") as f:
230
+ safe_json_dump(serialised, f, indent=2)
231
+ f.flush()
232
+ os.fsync(f.fileno())
233
+ tmp.replace(path)
234
+ except Exception as e:
235
+ logger.error(f"Failed to save subtasks to {path}: {e}")
236
+ # Clean up temp file if it exists
237
+ if tmp.exists():
238
+ try:
239
+ tmp.unlink()
240
+ except Exception:
241
+ pass
242
+ raise
243
+
244
+ # ---------- Screenshot ----------
245
+ def get_screenshot(self) -> Optional[bytes]:
246
+ pngs = sorted(self.screenshot_dir.glob("*.png"))
247
+ if not pngs:
248
+ logger.warning("No screenshot found in %s", self.screenshot_dir)
249
+ return None
250
+ latest = pngs[-1]
251
+ screenshot = Image.open(latest)
252
+ buf = io.BytesIO()
253
+ screenshot.save(buf, format="PNG")
254
+ return buf.getvalue()
255
+
256
+ def set_screenshot(self, img: Image.Image) -> Path:
257
+ ts = int(time.time() * 1000)
258
+ out = self.screenshot_dir / f"{ts}.png"
259
+ img.save(out)
260
+ logger.debug("Screenshot saved to %s", out)
261
+ return out
262
+
263
+ def get_screen_size(self) -> List[int]:
264
+ pngs = sorted(self.screenshot_dir.glob("*.png"))
265
+ if not pngs:
266
+ logger.warning(
267
+ "No screenshot found in %s, returning default size [1920, 1080]",
268
+ self.screenshot_dir)
269
+ return [1920, 1080]
270
+
271
+ latest = pngs[-1]
272
+ try:
273
+ screenshot = Image.open(latest)
274
+ width, height = screenshot.size
275
+ logger.info("Current screen size from %s: [%d, %d]", latest.name,
276
+ width, height)
277
+ return [width, height]
278
+ except Exception as e:
279
+ logger.error("Failed to get screen size from %s: %s", latest, e)
280
+ return [1920, 1080]
281
+
282
+ # ---------- Tu ----------
283
+ def get_Tu(self) -> str:
284
+ try:
285
+ with locked(self.tu_path, "r") as f:
286
+ data = safe_json_load(f)
287
+ return data.get("instruction", "") if isinstance(data, dict) else ""
288
+ except Exception as e:
289
+ logger.warning(f"Failed to get Tu from {self.tu_path}: {e}")
290
+ return ""
291
+
292
+ def set_Tu(self, instruction: str) -> None:
293
+ tmp = self.tu_path.with_suffix(".tmp")
294
+ try:
295
+ with locked(tmp, "w") as f:
296
+ safe_json_dump({"instruction": instruction},
297
+ f,
298
+ ensure_ascii=False,
299
+ indent=2)
300
+ f.flush()
301
+ os.fsync(f.fileno())
302
+ tmp.replace(self.tu_path)
303
+ except Exception as e:
304
+ logger.error(f"Failed to set Tu to {self.tu_path}: {e}")
305
+ if tmp.exists():
306
+ try:
307
+ tmp.unlink()
308
+ except Exception:
309
+ pass
310
+ raise
311
+
312
+ # ---------- search_query ----------
313
+ def get_search_query(self) -> str:
314
+ try:
315
+ with locked(self.search_query_path, "r") as f:
316
+ data = safe_json_load(f)
317
+ return data.get("query", "") if isinstance(data, dict) else ""
318
+ except Exception as e:
319
+ logger.warning(
320
+ f"Failed to get search query from {self.search_query_path}: {e}"
321
+ )
322
+ return ""
323
+
324
+ def set_search_query(self, query: str) -> None:
325
+ tmp = self.search_query_path.with_suffix(".tmp")
326
+ try:
327
+ with locked(tmp, "w") as f:
328
+ safe_json_dump({"query": query},
329
+ f,
330
+ ensure_ascii=False,
331
+ indent=2)
332
+ f.flush()
333
+ os.fsync(f.fileno())
334
+ tmp.replace(self.search_query_path)
335
+ except Exception as e:
336
+ logger.error(
337
+ f"Failed to set search query to {self.search_query_path}: {e}")
338
+ if tmp.exists():
339
+ try:
340
+ tmp.unlink()
341
+ except Exception:
342
+ pass
343
+ raise
344
+
345
+ # ====== completed_subtasks ======
346
+ def get_completed_subtasks(self) -> List[Node]:
347
+ return self._load_subtasks(self.completed_subtasks_path)
348
+
349
+ def set_completed_subtasks(self, nodes: List[Node]) -> None:
350
+ self._save_subtasks(self.completed_subtasks_path, nodes)
351
+
352
+ def add_completed_subtask(self, node: Node) -> None:
353
+ lst = self.get_completed_subtasks()
354
+ lst.append(node)
355
+ self._save_subtasks(self.completed_subtasks_path, lst)
356
+
357
+ # ====== failed_subtasks ======
358
+ def get_failed_subtasks(self) -> List[Node]:
359
+ return self._load_subtasks(self.failed_subtasks_path)
360
+
361
+ def set_failed_subtasks(self, nodes: List[Node]) -> None:
362
+ self._save_subtasks(self.failed_subtasks_path, nodes)
363
+
364
+ def add_failed_subtask(self, node: Node) -> None:
365
+ lst = self.get_failed_subtasks()
366
+ lst.append(node)
367
+ self._save_subtasks(self.failed_subtasks_path, lst)
368
+
369
+ def get_latest_failed_subtask(self) -> Optional[Node]:
370
+ lst = self.get_failed_subtasks()
371
+ return lst[-1] if lst else None
372
+
373
+ # ====== agent_log ======
374
+ def get_agent_log(self) -> List[Dict[str, Any]]:
375
+ try:
376
+ with locked(self.agent_log_path, "r") as f:
377
+ data = safe_json_load(f)
378
+ logger.debug("Agent log loaded")
379
+ return data if isinstance(data, list) else []
380
+ except Exception as e:
381
+ logger.warning(
382
+ f"Failed to load agent log from {self.agent_log_path}: {e}")
383
+ return []
384
+
385
+ def add_agent_log(self, log_entry: Dict[str, Any]) -> None:
386
+ log_list = self.get_agent_log()
387
+
388
+ log_entry["id"] = len(log_list) + 1
389
+ log_list.append(log_entry)
390
+
391
+ # Log to console
392
+ log_type = log_entry.get("type", "N/A").capitalize()
393
+ log_content = log_entry.get("content", "")
394
+ logger.debug(f"[Agent Log - {log_type}] {log_content}")
395
+
396
+ tmp = self.agent_log_path.with_suffix(".tmp")
397
+ try:
398
+ with locked(tmp, "w") as f:
399
+ safe_json_dump(log_list, f, ensure_ascii=False, indent=2)
400
+ f.flush()
401
+ os.fsync(f.fileno())
402
+ tmp.replace(self.agent_log_path)
403
+ except Exception as e:
404
+ logger.error(
405
+ f"Failed to add agent log to {self.agent_log_path}: {e}")
406
+ if tmp.exists():
407
+ try:
408
+ tmp.unlink()
409
+ except Exception:
410
+ pass
411
+ raise
412
+
413
+ # ====== remaining_subtasks ======
414
+ def get_remaining_subtasks(self) -> List[Node]:
415
+ return self._load_subtasks(self.remaining_subtasks_path)
416
+
417
+ def set_remaining_subtasks(self, nodes: List[Node]) -> None:
418
+ self._save_subtasks(self.remaining_subtasks_path, nodes)
419
+
420
+ def add_remaining_subtask(self, node: Node) -> None:
421
+ lst = self.get_remaining_subtasks()
422
+ lst.append(node)
423
+ self._save_subtasks(self.remaining_subtasks_path, lst)
424
+
425
+ # ---------- termination_flag ----------
426
+ def get_termination_flag(self) -> str:
427
+ try:
428
+ with locked(self.termination_flag_path, "r") as f:
429
+ data = safe_json_load(f)
430
+ return data if isinstance(data, str) else "not_terminated"
431
+ except Exception as e:
432
+ logger.warning(
433
+ f"Failed to get termination flag from {self.termination_flag_path}: {e}"
434
+ )
435
+ return "not_terminated"
436
+
437
+ def set_termination_flag(self, flag: str) -> None:
438
+ assert flag in {"terminated", "not_terminated"}
439
+ tmp = self.termination_flag_path.with_suffix(".tmp")
440
+ try:
441
+ with locked(tmp, "w") as f:
442
+ safe_json_dump(flag, f)
443
+ f.flush()
444
+ os.fsync(f.fileno())
445
+ tmp.replace(self.termination_flag_path)
446
+ except Exception as e:
447
+ logger.error(
448
+ f"Failed to set termination flag to {self.termination_flag_path}: {e}"
449
+ )
450
+ if tmp.exists():
451
+ try:
452
+ tmp.unlink()
453
+ except Exception:
454
+ pass
455
+ raise
456
+
457
+ # ---------- running_state ----------
458
+ def get_running_state(self) -> str:
459
+ try:
460
+ with locked(self.running_state_path, "r") as f:
461
+ data = safe_json_load(f)
462
+ return data if isinstance(data, str) else "stopped"
463
+ except Exception as e:
464
+ logger.warning(
465
+ f"Failed to get running state from {self.running_state_path}: {e}"
466
+ )
467
+ return "stopped"
468
+
469
+ def set_running_state(self, state: str) -> None:
470
+ assert state in {"running", "stopped"}
471
+ tmp = self.running_state_path.with_suffix(".tmp")
472
+ try:
473
+ with locked(tmp, "w") as f:
474
+ safe_json_dump(state, f)
475
+ f.flush()
476
+ os.fsync(f.fileno())
477
+ tmp.replace(self.running_state_path)
478
+ except Exception as e:
479
+ logger.error(
480
+ f"Failed to set running state to {self.running_state_path}: {e}"
481
+ )
482
+ if tmp.exists():
483
+ try:
484
+ tmp.unlink()
485
+ except Exception:
486
+ pass
487
+ raise
488
+
489
+ # ---------- High-level Wrappers ----------
490
+ def get_obs_for_manager(self):
491
+ return {
492
+ "screenshot": self.get_screenshot(),
493
+ "termination_flag": self.get_termination_flag(),
494
+ }
495
+
496
+ def get_obs_for_grounding(self):
497
+ return {"screenshot": self.get_screenshot()}
498
+
499
+ def get_obs_for_evaluator(self):
500
+ return {
501
+ "search_query": self.get_search_query(),
502
+ "failed_subtasks": self.get_failed_subtasks(),
503
+ "completed_subtasks": self.get_completed_subtasks(),
504
+ "remaining_subtasks": self.get_remaining_subtasks(),
505
+ "screenshot": self.get_screenshot(),
506
+ }
507
+
508
+ # ---------- Display Information Management ----------
509
+ def get_display_info(self) -> Dict[str, Any]:
510
+ """Get display information"""
511
+ try:
512
+ with locked(self.display_info_path, "r") as f:
513
+ content = f.read().strip()
514
+ if not content:
515
+ return {}
516
+ return json.loads(content)
517
+ except Exception as e:
518
+ logger.warning(
519
+ f"Failed to load display info from {self.display_info_path}: {e}"
520
+ )
521
+ return {}
522
+
523
+ def set_display_info(self, info: Dict[str, Any]) -> None:
524
+ """Set display information (overwrite)"""
525
+ tmp = self.display_info_path.with_suffix(".tmp")
526
+ try:
527
+ with locked(tmp, "w") as f:
528
+ safe_json_dump(info, f, ensure_ascii=False, indent=2)
529
+ f.flush()
530
+ os.fsync(f.fileno())
531
+ tmp.replace(self.display_info_path)
532
+ except Exception as e:
533
+ logger.error(
534
+ f"Failed to set display info to {self.display_info_path}: {e}")
535
+ if tmp.exists():
536
+ try:
537
+ tmp.unlink()
538
+ except Exception:
539
+ pass
540
+ raise
541
+
542
+ # ---------- New Unified Logging Method ----------
543
+ def log_operation(self, module: str, operation: str,
544
+ data: Dict[str, Any]) -> None:
545
+ """
546
+ Log operation information, organized by module and chronological order
547
+
548
+ Args:
549
+ module: Module name, such as 'manager', 'worker', 'grounding', etc.
550
+ operation: Operation name, such as 'formulate_query', 'retrieve_knowledge', etc.
551
+ data: Operation-related data, may include the following fields:
552
+ - duration: Operation duration (seconds)
553
+ - tokens: Token usage [input tokens, output tokens, total tokens]
554
+ - cost: Cost information
555
+ - content: Operation content or result
556
+ - Other custom fields
557
+ """
558
+ try:
559
+ info = self.get_display_info()
560
+
561
+ # Ensure the module exists
562
+ if "operations" not in info:
563
+ info["operations"] = {}
564
+
565
+ if module not in info["operations"]:
566
+ info["operations"][module] = []
567
+
568
+ # Normalize operation name, remove prefixes like "Manager.", "Worker.", etc.
569
+ normalized_operation = operation
570
+ for prefix in ["Manager.", "Worker.", "Hardware."]:
571
+ if normalized_operation.startswith(prefix):
572
+ normalized_operation = normalized_operation[len(prefix):]
573
+ break
574
+
575
+ # Find if there's an existing record for the same operation
576
+ found = False
577
+ for i, op in enumerate(info["operations"][module]):
578
+ # Normalize existing operation name
579
+ existing_op_name = op["operation"]
580
+ for prefix in ["Manager.", "Worker.", "Hardware."]:
581
+ if existing_op_name.startswith(prefix):
582
+ existing_op_name = existing_op_name[len(prefix):]
583
+ break
584
+
585
+ # If found the same operation and timestamp is close (within 5 seconds), merge the data
586
+ if (existing_op_name == normalized_operation or
587
+ op["operation"] == operation) and \
588
+ abs(op["timestamp"] - time.time()) < 5.0:
589
+ # Merge data, keep original timestamp
590
+ for key, value in data.items():
591
+ op[key] = value
592
+ found = True
593
+ break
594
+
595
+ # If no matching operation found, create new record
596
+ if not found:
597
+ # Add timestamp and operation name
598
+ operation_data = {
599
+ "operation": operation,
600
+ "timestamp": time.time(),
601
+ **data
602
+ }
603
+
604
+ # Add to the operation list of the corresponding module
605
+ info["operations"][module].append(operation_data)
606
+
607
+ self.set_display_info(info)
608
+ except Exception as e:
609
+ logger.error(f"Failed to log operation {module}.{operation}: {e}")
610
+ # Don't raise the exception to avoid breaking the main flow