lybic-guiagents 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lybic-guiagents might be problematic. Click here for more details.

Files changed (85) hide show
  1. desktop_env/__init__.py +1 -0
  2. desktop_env/actions.py +203 -0
  3. desktop_env/controllers/__init__.py +0 -0
  4. desktop_env/controllers/python.py +471 -0
  5. desktop_env/controllers/setup.py +882 -0
  6. desktop_env/desktop_env.py +509 -0
  7. desktop_env/evaluators/__init__.py +5 -0
  8. desktop_env/evaluators/getters/__init__.py +41 -0
  9. desktop_env/evaluators/getters/calc.py +15 -0
  10. desktop_env/evaluators/getters/chrome.py +1774 -0
  11. desktop_env/evaluators/getters/file.py +154 -0
  12. desktop_env/evaluators/getters/general.py +42 -0
  13. desktop_env/evaluators/getters/gimp.py +38 -0
  14. desktop_env/evaluators/getters/impress.py +126 -0
  15. desktop_env/evaluators/getters/info.py +24 -0
  16. desktop_env/evaluators/getters/misc.py +406 -0
  17. desktop_env/evaluators/getters/replay.py +20 -0
  18. desktop_env/evaluators/getters/vlc.py +86 -0
  19. desktop_env/evaluators/getters/vscode.py +35 -0
  20. desktop_env/evaluators/metrics/__init__.py +160 -0
  21. desktop_env/evaluators/metrics/basic_os.py +68 -0
  22. desktop_env/evaluators/metrics/chrome.py +493 -0
  23. desktop_env/evaluators/metrics/docs.py +1011 -0
  24. desktop_env/evaluators/metrics/general.py +665 -0
  25. desktop_env/evaluators/metrics/gimp.py +637 -0
  26. desktop_env/evaluators/metrics/libreoffice.py +28 -0
  27. desktop_env/evaluators/metrics/others.py +92 -0
  28. desktop_env/evaluators/metrics/pdf.py +31 -0
  29. desktop_env/evaluators/metrics/slides.py +957 -0
  30. desktop_env/evaluators/metrics/table.py +585 -0
  31. desktop_env/evaluators/metrics/thunderbird.py +176 -0
  32. desktop_env/evaluators/metrics/utils.py +719 -0
  33. desktop_env/evaluators/metrics/vlc.py +524 -0
  34. desktop_env/evaluators/metrics/vscode.py +283 -0
  35. desktop_env/providers/__init__.py +35 -0
  36. desktop_env/providers/aws/__init__.py +0 -0
  37. desktop_env/providers/aws/manager.py +278 -0
  38. desktop_env/providers/aws/provider.py +186 -0
  39. desktop_env/providers/aws/provider_with_proxy.py +315 -0
  40. desktop_env/providers/aws/proxy_pool.py +193 -0
  41. desktop_env/providers/azure/__init__.py +0 -0
  42. desktop_env/providers/azure/manager.py +87 -0
  43. desktop_env/providers/azure/provider.py +207 -0
  44. desktop_env/providers/base.py +97 -0
  45. desktop_env/providers/gcp/__init__.py +0 -0
  46. desktop_env/providers/gcp/manager.py +0 -0
  47. desktop_env/providers/gcp/provider.py +0 -0
  48. desktop_env/providers/virtualbox/__init__.py +0 -0
  49. desktop_env/providers/virtualbox/manager.py +463 -0
  50. desktop_env/providers/virtualbox/provider.py +124 -0
  51. desktop_env/providers/vmware/__init__.py +0 -0
  52. desktop_env/providers/vmware/manager.py +455 -0
  53. desktop_env/providers/vmware/provider.py +105 -0
  54. gui_agents/__init__.py +0 -0
  55. gui_agents/agents/Action.py +209 -0
  56. gui_agents/agents/__init__.py +0 -0
  57. gui_agents/agents/agent_s.py +832 -0
  58. gui_agents/agents/global_state.py +610 -0
  59. gui_agents/agents/grounding.py +651 -0
  60. gui_agents/agents/hardware_interface.py +129 -0
  61. gui_agents/agents/manager.py +568 -0
  62. gui_agents/agents/translator.py +132 -0
  63. gui_agents/agents/worker.py +355 -0
  64. gui_agents/cli_app.py +560 -0
  65. gui_agents/core/__init__.py +0 -0
  66. gui_agents/core/engine.py +1496 -0
  67. gui_agents/core/knowledge.py +449 -0
  68. gui_agents/core/mllm.py +555 -0
  69. gui_agents/tools/__init__.py +0 -0
  70. gui_agents/tools/tools.py +727 -0
  71. gui_agents/unit_test/__init__.py +0 -0
  72. gui_agents/unit_test/run_tests.py +65 -0
  73. gui_agents/unit_test/test_manager.py +330 -0
  74. gui_agents/unit_test/test_worker.py +269 -0
  75. gui_agents/utils/__init__.py +0 -0
  76. gui_agents/utils/analyze_display.py +301 -0
  77. gui_agents/utils/common_utils.py +263 -0
  78. gui_agents/utils/display_viewer.py +281 -0
  79. gui_agents/utils/embedding_manager.py +53 -0
  80. gui_agents/utils/image_axis_utils.py +27 -0
  81. lybic_guiagents-0.1.0.dist-info/METADATA +416 -0
  82. lybic_guiagents-0.1.0.dist-info/RECORD +85 -0
  83. lybic_guiagents-0.1.0.dist-info/WHEEL +5 -0
  84. lybic_guiagents-0.1.0.dist-info/licenses/LICENSE +201 -0
  85. lybic_guiagents-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,281 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ Display Viewer - Used to display operation records in display.json file in chronological order
4
+
5
+ Usage:
6
+ python -m lybicguiagents.gui_agents.utils.display_viewer --file /path/to/display.json [--output text|json] [--filter module1,module2]
7
+ """
8
+
9
+ import os
10
+ import sys
11
+ import json
12
+ import argparse
13
+ import datetime
14
+ from pathlib import Path
15
+ from typing import Dict, List, Any, Optional, Tuple
16
+
17
+
18
+ def load_display_json(file_path: str) -> Dict:
19
+ """
20
+ Load display.json file
21
+
22
+ Args:
23
+ file_path: Path to display.json file
24
+
25
+ Returns:
26
+ Parsed JSON data
27
+ """
28
+ try:
29
+ try:
30
+ with open(file_path, 'r', encoding='utf-8') as f:
31
+ return json.load(f)
32
+ except UnicodeDecodeError:
33
+ print(
34
+ f"Warning: Failed to decode '{file_path}' with utf-8, retrying with GB2312..."
35
+ )
36
+ with open(file_path, 'r', encoding='gb2312') as f:
37
+ return json.load(f)
38
+ except FileNotFoundError:
39
+ print(f"Error: File '{file_path}' does not exist")
40
+ sys.exit(1)
41
+ except json.JSONDecodeError:
42
+ print(f"Error: File '{file_path}' is not a valid JSON format")
43
+ sys.exit(1)
44
+ except Exception as e:
45
+ print(f"Error: An error occurred while reading file '{file_path}': {e}")
46
+ sys.exit(1)
47
+
48
+
49
+ def flatten_operations(data: Dict) -> List[Dict]:
50
+ """
51
+ Flatten all module operation records into a time-sorted list
52
+
53
+ Args:
54
+ data: display.json data
55
+
56
+ Returns:
57
+ List of operation records sorted by time
58
+ """
59
+ all_operations = []
60
+
61
+ if "operations" not in data:
62
+ return all_operations
63
+
64
+ for module, operations in data["operations"].items():
65
+ for op in operations:
66
+ # Add module information
67
+ op["module"] = module
68
+ all_operations.append(op)
69
+
70
+ # Sort by timestamp
71
+ all_operations.sort(key=lambda x: x.get("timestamp", 0))
72
+
73
+ return all_operations
74
+
75
+
76
+ def format_timestamp(timestamp: float) -> str:
77
+ """
78
+ Format timestamp into readable datetime
79
+
80
+ Args:
81
+ timestamp: UNIX timestamp
82
+
83
+ Returns:
84
+ Formatted datetime string
85
+ """
86
+ dt = datetime.datetime.fromtimestamp(timestamp)
87
+ return dt.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
88
+
89
+
90
+ def format_duration(duration: float) -> str:
91
+ """
92
+ Format duration
93
+
94
+ Args:
95
+ duration: Duration (seconds)
96
+
97
+ Returns:
98
+ Formatted duration string
99
+ """
100
+ if duration < 0.001:
101
+ return f"{duration * 1000000:.2f}μs"
102
+ elif duration < 1:
103
+ return f"{duration * 1000:.2f}ms"
104
+ else:
105
+ return f"{duration:.2f}s"
106
+
107
+
108
+ def format_tokens(tokens: List[int]) -> str:
109
+ """
110
+ Format tokens information
111
+
112
+ Args:
113
+ tokens: [input tokens, output tokens, total tokens]
114
+
115
+ Returns:
116
+ Formatted tokens string
117
+ """
118
+ if not tokens or len(tokens) < 3:
119
+ return "N/A"
120
+
121
+ return f"in:{tokens[0]} out:{tokens[1]} total:{tokens[2]}"
122
+
123
+
124
+ def truncate_text(text: str, max_length: int = 100) -> str:
125
+ """
126
+ Truncate text, add ellipsis when exceeding maximum length
127
+
128
+ Args:
129
+ text: Original text
130
+ max_length: Maximum length
131
+
132
+ Returns:
133
+ Truncated text
134
+ """
135
+ if not text:
136
+ return ""
137
+
138
+ if isinstance(text, (dict, list)):
139
+ text = str(text)
140
+
141
+ if len(text) <= max_length:
142
+ return text
143
+
144
+ return text[:max_length - 3] + "..."
145
+
146
+
147
+ def find_latest_display_json() -> Optional[str]:
148
+ """
149
+ Find the latest display.json file
150
+
151
+ Returns:
152
+ Path to the latest display.json file, or None if not found
153
+ """
154
+ # Look for the runtime folder in the current directory
155
+ runtime_dir = Path("runtime")
156
+ if not runtime_dir.exists() or not runtime_dir.is_dir():
157
+ # Try looking in the parent directory
158
+ parent_runtime = Path("..") / "runtime"
159
+ if parent_runtime.exists() and parent_runtime.is_dir():
160
+ runtime_dir = parent_runtime
161
+ else:
162
+ return None
163
+
164
+ # Find all timestamp folders
165
+ timestamp_dirs = [d for d in runtime_dir.iterdir() if d.is_dir()]
166
+ if not timestamp_dirs:
167
+ return None
168
+
169
+ # Sort by folder name (timestamp) and take the latest
170
+ latest_dir = sorted(timestamp_dirs)[-1]
171
+ display_file = latest_dir / "display.json"
172
+
173
+ if display_file.exists():
174
+ return str(display_file)
175
+
176
+ return None
177
+
178
+
179
+ def main():
180
+ parser = argparse.ArgumentParser(
181
+ description=
182
+ "Display operation records in display.json file in chronological order")
183
+ parser.add_argument("--file", help="Path to display.json file")
184
+ parser.add_argument("--dir", help="Path to directory containing display.json files (recursive)")
185
+ parser.add_argument("--output",
186
+ choices=["text", "json"],
187
+ default="text",
188
+ help="Output format (default: text)")
189
+ parser.add_argument(
190
+ "--filter",
191
+ help="Modules to filter, separated by commas (e.g., manager,worker)")
192
+
193
+ args = parser.parse_args()
194
+
195
+ if args.file and args.dir:
196
+ print("Error: --file and --dir cannot be used together")
197
+ sys.exit(1)
198
+
199
+ def process_one_file(file_path: str):
200
+ # Load data
201
+ data = load_display_json(file_path)
202
+ # Flatten and sort operations
203
+ operations = flatten_operations(data)
204
+ # Handle module filtering
205
+ filter_modules = None
206
+ if args.filter:
207
+ filter_modules = [module.strip() for module in args.filter.split(",")]
208
+ # Generate output content
209
+ output_content = ""
210
+ if args.output == "json":
211
+ # Filter operations if modules are specified
212
+ if filter_modules:
213
+ filtered_ops = [op for op in operations if op["module"] in filter_modules]
214
+ else:
215
+ filtered_ops = operations
216
+ output_content = json.dumps(filtered_ops, indent=2, ensure_ascii=False)
217
+ else:
218
+ # Generate text format output
219
+ output_lines = []
220
+ for i, op in enumerate(operations):
221
+ # Skip modules that don't match the filter if a filter is specified
222
+ if filter_modules and op["module"] not in filter_modules:
223
+ continue
224
+ module = op["module"]
225
+ operation = op.get("operation", "unknown")
226
+ timestamp = format_timestamp(op.get("timestamp", 0))
227
+ # Output basic information
228
+ output_lines.append(f"{i+1:3d} | {timestamp} | {module:10} | {operation}")
229
+ # Output detailed information
230
+ if "duration" in op:
231
+ output_lines.append(f" └─ Duration: {format_duration(op['duration'])}")
232
+ if "tokens" in op:
233
+ output_lines.append(f" └─ Tokens: {format_tokens(op['tokens'])}")
234
+ if "cost" in op:
235
+ output_lines.append(f" └─ Cost: {op['cost']}")
236
+ if "content" in op:
237
+ content = op["content"]
238
+ output_lines.append(f" └─ Content: {content}")
239
+ if "status" in op:
240
+ output_lines.append(f" └─ Status: {op['status']}")
241
+ output_lines.append("")
242
+ output_content = "\n".join(output_lines)
243
+ # Write output to file
244
+ input_path = Path(file_path)
245
+ output_filename = f"display_viewer_output_{args.output}.txt"
246
+ output_path = input_path.parent / output_filename
247
+ try:
248
+ with open(output_path, 'w', encoding='utf-8') as f:
249
+ f.write(output_content)
250
+ print(f"Output written to: {output_path}")
251
+ except Exception as e:
252
+ print(f"Error writing output file: {e}")
253
+ sys.exit(1)
254
+
255
+ if args.dir:
256
+ for root, dirs, files in os.walk(args.dir):
257
+ for file in files:
258
+ if file == "display.json":
259
+ file_path = os.path.join(root, file)
260
+ print(f"Processing: {file_path}")
261
+ process_one_file(file_path)
262
+ return
263
+
264
+ file_path = args.file
265
+ if not file_path:
266
+ file_path = find_latest_display_json()
267
+ if not file_path:
268
+ print(
269
+ "Error: Cannot find display.json file, please specify file path using --file parameter"
270
+ )
271
+ sys.exit(1)
272
+ print(f"Using the latest display.json file: {file_path}")
273
+ process_one_file(file_path)
274
+
275
+
276
+ if __name__ == "__main__":
277
+ """
278
+ python display_viewer.py --file
279
+ python display_viewer.py --dir
280
+ """
281
+ main()
@@ -0,0 +1,53 @@
1
+ import numpy as np
2
+ from gui_agents.utils.common_utils import (
3
+ load_embeddings,
4
+ save_embeddings,
5
+ )
6
+ import os
7
+
8
+ # List all embeddings' keys and their shapes
9
+ def list_embeddings(embeddings_path: str):
10
+ if not os.path.exists(embeddings_path):
11
+ print(f"[EmbeddingManager] File not found: {embeddings_path}")
12
+ return {}
13
+ embeddings = load_embeddings(embeddings_path)
14
+ info = {}
15
+ for k, v in embeddings.items():
16
+ arr = np.array(v)
17
+ info[k] = {'shape': arr.shape, 'preview': arr.flatten()[:5].tolist()}
18
+ return info
19
+
20
+ # Delete a specific embedding by key
21
+ def delete_embedding(embeddings_path: str, key: str) -> bool:
22
+ if not os.path.exists(embeddings_path):
23
+ print(f"[EmbeddingManager] File not found: {embeddings_path}")
24
+ return False
25
+ embeddings = load_embeddings(embeddings_path)
26
+ if key not in embeddings:
27
+ print(f"[EmbeddingManager] Key not found: {key}")
28
+ return False
29
+ del embeddings[key]
30
+ save_embeddings(embeddings_path, embeddings)
31
+ print(f"[EmbeddingManager] Deleted embedding for key: {key}")
32
+ return True
33
+
34
+ def delete_empty_shape_embeddings(embeddings_path: str) -> int:
35
+ """Delete all embeddings whose value is empty (shape==0), shape==(), or content is error string, and return the number deleted."""
36
+ if not os.path.exists(embeddings_path):
37
+ print(f"[EmbeddingManager] File not found: {embeddings_path}")
38
+ return 0
39
+ embeddings = load_embeddings(embeddings_path)
40
+ to_delete = []
41
+ for k, v in embeddings.items():
42
+ arr = np.array(v)
43
+ # Delete shape==0 or shape==() or content is string/error information
44
+ if arr.size == 0 or arr.shape == () or (
45
+ isinstance(v, list) and v and isinstance(v[0], str) and v[0].startswith('Error:')
46
+ ) or (isinstance(v, str) and v.startswith('Error:')):
47
+ to_delete.append(k)
48
+ for k in to_delete:
49
+ del embeddings[k]
50
+ print(f"[EmbeddingManager] Deleted empty or error embedding for key: {k}")
51
+ if to_delete:
52
+ save_embeddings(embeddings_path, embeddings)
53
+ return len(to_delete)
@@ -0,0 +1,27 @@
1
+ from PIL import Image
2
+
3
+
4
+ def pad_to_square(image: Image.Image,
5
+ fill_color=(0, 0, 0),
6
+ padding: int = 0) -> Image.Image:
7
+ """
8
+ 先补成正方形,再在四周扩展padding像素。
9
+ """
10
+ width, height = image.size
11
+ if width == height:
12
+ square_img = image.copy()
13
+ else:
14
+ new_size = max(width, height)
15
+ square_img = Image.new(image.mode, (new_size, new_size), fill_color)
16
+ left = (new_size - width) // 2
17
+ top = (new_size - height) // 2
18
+ square_img.paste(image, (left, top))
19
+
20
+ if padding > 0:
21
+ final_size = square_img.size[0] + 2 * padding
22
+ padded_img = Image.new(square_img.mode, (final_size, final_size),
23
+ fill_color)
24
+ padded_img.paste(square_img, (padding, padding))
25
+ return padded_img
26
+ else:
27
+ return square_img