lybic-guiagents 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lybic-guiagents might be problematic. Click here for more details.

Files changed (38) hide show
  1. gui_agents/__init__.py +63 -0
  2. gui_agents/agents/Action.py +3 -3
  3. gui_agents/agents/Backend/ADBBackend.py +62 -0
  4. gui_agents/agents/Backend/Backend.py +28 -0
  5. gui_agents/agents/Backend/LybicBackend.py +354 -0
  6. gui_agents/agents/Backend/PyAutoGUIBackend.py +183 -0
  7. gui_agents/agents/Backend/PyAutoGUIVMwareBackend.py +250 -0
  8. gui_agents/agents/Backend/__init__.py +0 -0
  9. gui_agents/agents/agent_s.py +0 -2
  10. gui_agents/agents/grounding.py +1 -6
  11. gui_agents/agents/hardware_interface.py +24 -7
  12. gui_agents/agents/manager.py +0 -3
  13. gui_agents/agents/translator.py +1 -1
  14. gui_agents/agents/worker.py +1 -2
  15. gui_agents/cli_app.py +143 -8
  16. gui_agents/core/engine.py +0 -2
  17. gui_agents/core/knowledge.py +0 -2
  18. gui_agents/lybic_client/__init__.py +0 -0
  19. gui_agents/lybic_client/lybic_client.py +88 -0
  20. gui_agents/prompts/__init__.py +0 -0
  21. gui_agents/prompts/prompts.py +869 -0
  22. gui_agents/service/__init__.py +19 -0
  23. gui_agents/service/agent_service.py +527 -0
  24. gui_agents/service/api_models.py +136 -0
  25. gui_agents/service/config.py +241 -0
  26. gui_agents/service/exceptions.py +35 -0
  27. gui_agents/store/__init__.py +0 -0
  28. gui_agents/store/registry.py +22 -0
  29. gui_agents/tools/tools.py +0 -4
  30. gui_agents/unit_test/test_manager.py +0 -2
  31. gui_agents/unit_test/test_worker.py +0 -2
  32. gui_agents/utils/analyze_display.py +1 -1
  33. gui_agents/utils/common_utils.py +0 -2
  34. {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/METADATA +203 -75
  35. {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/RECORD +38 -21
  36. {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/WHEEL +0 -0
  37. {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/licenses/LICENSE +0 -0
  38. {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/top_level.txt +0 -0
gui_agents/__init__.py CHANGED
@@ -0,0 +1,63 @@
1
+ """
2
+ GUI Agents - A comprehensive GUI automation framework
3
+
4
+ This package provides both low-level agent components and a high-level service interface
5
+ for GUI automation tasks across different platforms and backends.
6
+
7
+ Main Components:
8
+ - AgentService: High-level service interface (recommended for most users)
9
+ - AgentS2, AgentSFast: Core agent implementations
10
+ - HardwareInterface: Hardware abstraction layer
11
+ - ServiceConfig: Configuration management
12
+
13
+ Quick Start:
14
+ from gui_agents import AgentService
15
+
16
+ service = AgentService()
17
+ result = service.execute_task("Take a screenshot")
18
+ print(f"Task completed: {result.status}")
19
+ """
20
+
21
+ # High-level service interface (recommended)
22
+ from .service import (
23
+ AgentService,
24
+ ServiceConfig,
25
+ TaskRequest,
26
+ TaskResult,
27
+ TaskStatus,
28
+ ExecutionStats,
29
+ AgentServiceError,
30
+ ConfigurationError,
31
+ TaskExecutionError
32
+ )
33
+
34
+ # Core agent classes (for advanced users)
35
+ from .agents.agent_s import AgentS2, AgentSFast
36
+ from .agents.hardware_interface import HardwareInterface
37
+ from .store.registry import Registry
38
+ from .agents.global_state import GlobalState
39
+
40
+ __version__ = "0.2.1"
41
+
42
+ # Primary exports (what users should typically use)
43
+ __all__ = [
44
+ # High-level service interface
45
+ "AgentService",
46
+ "ServiceConfig",
47
+ "TaskRequest",
48
+ "TaskResult",
49
+ "TaskStatus",
50
+ "ExecutionStats",
51
+
52
+ # Exceptions
53
+ "AgentServiceError",
54
+ "ConfigurationError",
55
+ "TaskExecutionError",
56
+
57
+ # Core classes (for advanced usage)
58
+ "AgentS2",
59
+ "AgentSFast",
60
+ "HardwareInterface",
61
+ "Registry",
62
+ "GlobalState",
63
+ ]
@@ -27,9 +27,9 @@ The registry makes the last line work without an if‑else chain.
27
27
  """
28
28
 
29
29
  from abc import ABC
30
- from dataclasses import dataclass, field, fields, asdict
31
- from enum import Enum, auto
32
- from typing import Any, Dict, List, Tuple, Type, TypeVar, ClassVar
30
+ from dataclasses import dataclass, field, fields
31
+ from enum import Enum
32
+ from typing import Any, Dict, List, Type, TypeVar, ClassVar
33
33
 
34
34
  __all__ = [
35
35
  "Action",
@@ -0,0 +1,62 @@
1
+ # ---------------------------------------------------------------------------
2
+ # 2) Android device backend (ADB)
3
+ # ---------------------------------------------------------------------------
4
+ from gui_agents.agents.Action import (
5
+ Action,
6
+ Click,
7
+ Drag,
8
+ TypeText,
9
+ Scroll,
10
+ Hotkey,
11
+ Wait,
12
+ )
13
+
14
+ from gui_agents.agents.Backend.Backend import Backend
15
+ import time
16
+ import subprocess
17
+
18
+ class ADBBackend(Backend):
19
+ """Very light‑weight ADB backend (tap / swipe / text / keyevent)."""
20
+
21
+ _supported = {Click, Drag, TypeText, Hotkey, Wait}
22
+
23
+ def __init__(self, serial: str | None = None):
24
+ self.serial = serial # specify target device; None = default
25
+
26
+ # ------------------------------------------------------------------
27
+ def execute(self, action: Action) -> None:
28
+ if not self.supports(type(action)):
29
+ raise NotImplementedError
30
+
31
+ prefix = ["adb"]
32
+ if self.serial:
33
+ prefix += ["-s", self.serial]
34
+ prefix.append("shell")
35
+
36
+ if isinstance(action, Click):
37
+ cmd = prefix
38
+ # cmd = prefix + ["input", "tap", str(action.xy[0]), str(action.xy[1])]
39
+ elif isinstance(action, Drag):
40
+ cmd = prefix + [
41
+ "input", "swipe",
42
+ # str(action.start[0]), str(action.start[1]),
43
+ # str(action.end[0]), str(action.end[1]),
44
+ # str(int(action.duration * 1000)), # type: ignore
45
+ ]
46
+ elif isinstance(action, TypeText):
47
+ text = action.text.replace(" ", "%s") # escape spaces
48
+ cmd = prefix + ["input", "text", text]
49
+ # if action.press_enter:
50
+ # subprocess.run(prefix + ["input", "keyevent", "ENTER"], check=True)
51
+ # return
52
+ elif isinstance(action, Hotkey):
53
+ # Map first key for demo purposes
54
+ key = action.keys[0].upper()
55
+ cmd = prefix + ["input", "keyevent", key]
56
+ elif isinstance(action, Wait):
57
+ time.sleep(action.seconds) # type: ignore
58
+ return
59
+ else:
60
+ raise NotImplementedError
61
+
62
+ subprocess.run(cmd, check=True)
@@ -0,0 +1,28 @@
1
+ # Abstract backend base‑class
2
+ # ---------------------------------------------------------------------------
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any, List, Type, Dict, Set
5
+ from gui_agents.agents.Action import (
6
+ Action
7
+ )
8
+
9
+
10
+ class Backend(ABC):
11
+ """Abstract base for platform back‑ends."""
12
+
13
+ #: Each backend advertises which Action subclasses it supports.
14
+ _supported: Set[Type[Action]] = set()
15
+
16
+ # ---------------------------------------------------------------------
17
+ def supports(self, action_type: Type[Action]) -> bool:
18
+ return action_type in self._supported
19
+
20
+ # ---------------------------------------------------------------------
21
+ @abstractmethod
22
+ def execute(self, action: Action) -> Any:
23
+ """Translate an *Action* into concrete commands.
24
+
25
+ Should raise **NotImplementedError** if the *action* type is not in
26
+ `self._supported`, so upper layers can decide how to degrade / retry.
27
+ """
28
+
@@ -0,0 +1,354 @@
1
+ # ---------------------------------------------------------------------------
2
+ # 3) Cloud desktop / custom device backend using Official Lybic Python SDK
3
+ # https://lybic.ai/docs/sdk/python
4
+ # ---------------------------------------------------------------------------
5
+ import asyncio
6
+ import logging
7
+ import time
8
+ import os
9
+ from typing import Dict, Any, Optional
10
+ from PIL import Image
11
+
12
+ from gui_agents.agents.Action import (
13
+ Action,
14
+ Click,
15
+ DoubleClick,
16
+ Move,
17
+ Drag,
18
+ TypeText,
19
+ Scroll,
20
+ Hotkey,
21
+ Wait,
22
+ Screenshot,
23
+ Memorize
24
+ )
25
+
26
+ from gui_agents.agents.Backend.Backend import Backend
27
+
28
+ # 导入官方Lybic SDK
29
+ try:
30
+ from lybic import LybicClient, Sandbox, ComputerUse, dto
31
+ except ImportError:
32
+ raise ImportError(
33
+ "Lybic Python SDK not found. Please install it with: pip install --upgrade lybic"
34
+ )
35
+
36
+
37
+ log = logging.getLogger(__name__)
38
+
39
+
40
+ class LybicBackend(Backend):
41
+ """
42
+ 基于官方Lybic Python SDK的Backend实现
43
+ 支持与原LybicBackend相同的Action类型,但使用官方SDK替代HTTP调用
44
+ """
45
+
46
+ _supported = {Click, DoubleClick, Move, Drag, TypeText, Scroll, Hotkey,
47
+ Wait, Screenshot, Memorize}
48
+
49
+ def __init__(self,
50
+ api_key: Optional[str] = None,
51
+ org_id: Optional[str] = None,
52
+ endpoint: Optional[str] = None,
53
+ timeout: int = 10,
54
+ extra_headers: Optional[Dict[str, str]] = None,
55
+ sandbox_opts: Optional[Dict[str, Any]] = None,
56
+ max_retries: int = 2,
57
+ precreate_sid: str = '',
58
+ **kwargs):
59
+ """
60
+ 初始化LybicBackend
61
+
62
+ Args:
63
+ api_key: Lybic API密钥,如果为None则从环境变量LYBIC_API_KEY获取
64
+ org_id: Lybic组织ID,如果为None则从环境变量LYBIC_ORG_ID获取
65
+ endpoint: API端点,如果为None则从环境变量LYBIC_API_ENDPOINT获取
66
+ timeout: API请求超时时间
67
+ extra_headers: 额外的HTTP头
68
+ sandbox_opts: 创建沙盒时的额外选项
69
+ max_retries: 最大重试次数
70
+ precreate_sid: 预创建的沙盒ID,如果提供则不会创建新沙盒
71
+ """
72
+ self.loop = asyncio.new_event_loop()
73
+ asyncio.set_event_loop(self.loop)
74
+
75
+ # 初始化参数
76
+ self.api_key = api_key or os.getenv("LYBIC_API_KEY")
77
+ self.org_id = org_id or os.getenv("LYBIC_ORG_ID")
78
+ self.endpoint = endpoint or os.getenv("LYBIC_API_ENDPOINT", "https://api.lybic.cn")
79
+ self.timeout = timeout
80
+ self.extra_headers = extra_headers
81
+ self.max_retries = max_retries
82
+ self.precreate_sid = precreate_sid or os.getenv("LYBIC_PRECREATE_SID", "")
83
+
84
+ # 初始化SDK客户端(仅在有必要参数时)
85
+ if self.api_key and self.org_id:
86
+ self.client = LybicClient(
87
+ org_id=self.org_id,
88
+ api_key=self.api_key,
89
+ endpoint=self.endpoint,
90
+ timeout=self.timeout,
91
+ extra_headers=self.extra_headers or {}
92
+ )
93
+ else:
94
+ raise ValueError("LYBIC_API_KEY and LYBIC_ORG_ID are required. Please set them as environment variables or pass them as arguments.")
95
+
96
+ # 初始化SDK组件
97
+ self.sandbox_manager = Sandbox(self.client)
98
+ self.computer_use = ComputerUse(self.client)
99
+
100
+ # 沙盒ID
101
+ self.sandbox_id = self.precreate_sid
102
+
103
+ # 如果没有预创建的沙盒ID,则创建新沙盒
104
+ if self.sandbox_id is None:
105
+ print("Creating sandbox using official SDK...")
106
+ max_life_seconds = int(os.getenv("LYBIC_MAX_LIFE_SECONDS", "3600"))
107
+ sandbox_opts = sandbox_opts or {}
108
+ sandbox_opts.setdefault("maxLifeSeconds", max_life_seconds)
109
+
110
+ new_sandbox = self.loop.run_until_complete(
111
+ self.sandbox_manager.create(
112
+ name=sandbox_opts.get("name", "agent-run"),
113
+ **sandbox_opts
114
+ )
115
+ )
116
+ # 使用getattr以防属性名不同
117
+ self.sandbox_id = getattr(new_sandbox, 'id', "") or getattr(new_sandbox, 'sandbox_id', "")
118
+ if not self.sandbox_id:
119
+ raise RuntimeError(f"Failed to get sandbox ID from response: {new_sandbox}")
120
+ print(f"Created sandbox: {self.sandbox_id}")
121
+
122
+ def __del__(self):
123
+ """清理资源"""
124
+ try:
125
+ if hasattr(self, 'client'):
126
+ self.loop.run_until_complete(self.client.close())
127
+ except Exception as e:
128
+ log.warning(f"Error closing Lybic client: {e}")
129
+
130
+ def execute(self, action: Action) -> Any:
131
+ """
132
+ 执行Action,将其转换为Lybic SDK调用
133
+ """
134
+ if not self.supports(type(action)):
135
+ raise NotImplementedError(f"{type(action).__name__} unsupported")
136
+ if not self.sandbox_id:
137
+ raise RuntimeError("Sandbox ID is empty; create a sandbox first (precreate_sid or auto-create).")
138
+
139
+ if isinstance(action, Click):
140
+ return self._click(action)
141
+ elif isinstance(action, DoubleClick):
142
+ return self._double_click(action)
143
+ elif isinstance(action, Move):
144
+ return self._move(action)
145
+ elif isinstance(action, Drag):
146
+ return self._drag(action)
147
+ elif isinstance(action, TypeText):
148
+ return self._type(action)
149
+ elif isinstance(action, Scroll):
150
+ return self._scroll(action)
151
+ elif isinstance(action, Hotkey):
152
+ return self._hotkey(action)
153
+ elif isinstance(action, Screenshot):
154
+ return self._screenshot()
155
+ elif isinstance(action, Wait):
156
+ duration = action.duration if action.duration is not None else 0.2
157
+ time.sleep(duration)
158
+ elif isinstance(action, Memorize):
159
+ log.info(f"Memorizing information: {action.information}")
160
+
161
+ def _execute_with_retry(self, action_dto: dto.ComputerUseActionDto) -> dto.SandboxActionResponseDto:
162
+ """
163
+ 带重试机制的执行方法
164
+ """
165
+ async def _execute():
166
+ return await self.computer_use.execute_computer_use_action(
167
+ sandbox_id=self.sandbox_id,
168
+ data=action_dto
169
+ )
170
+
171
+ exc: Optional[Exception] = None
172
+ for attempt in range(1, self.max_retries + 2):
173
+ try:
174
+ return self.loop.run_until_complete(_execute())
175
+ except Exception as e:
176
+ exc = e
177
+ log.warning(f"Lybic SDK action failed (try {attempt}/{self.max_retries+1}): {e}")
178
+ time.sleep(0.4 * attempt) # 退避策略
179
+
180
+ raise RuntimeError(f"Lybic SDK action failed after {self.max_retries + 1} attempts: {exc}") from exc
181
+
182
+ def _click(self, act: Click) -> dto.SandboxActionResponseDto:
183
+ """执行点击操作"""
184
+ click_action = dto.MouseClickAction(
185
+ type="mouse:click",
186
+ x=dto.PixelLength(type="px", value=act.x),
187
+ y=dto.PixelLength(type="px", value=act.y),
188
+ button=1 if act.button == 0 else 2, # 0=左键, 1=右键 -> 1=左键, 2=右键
189
+ holdKey=" ".join(act.holdKey) if act.holdKey else ""
190
+ )
191
+
192
+ action_dto = dto.ComputerUseActionDto(
193
+ action=click_action,
194
+ includeScreenShot=False,
195
+ includeCursorPosition=False
196
+ )
197
+
198
+ return self._execute_with_retry(action_dto)
199
+
200
+ def _double_click(self, act: DoubleClick) -> dto.SandboxActionResponseDto:
201
+ """执行双击操作"""
202
+ double_click_action = dto.MouseDoubleClickAction(
203
+ type="mouse:doubleClick",
204
+ x=dto.PixelLength(type="px", value=act.x),
205
+ y=dto.PixelLength(type="px", value=act.y),
206
+ button=1 if act.button == 0 else 2,
207
+ holdKey=" ".join(act.holdKey) if act.holdKey else ""
208
+ )
209
+
210
+ action_dto = dto.ComputerUseActionDto(
211
+ action=double_click_action,
212
+ includeScreenShot=False,
213
+ includeCursorPosition=False
214
+ )
215
+
216
+ return self._execute_with_retry(action_dto)
217
+
218
+ def _move(self, act: Move) -> dto.SandboxActionResponseDto:
219
+ """执行鼠标移动操作"""
220
+ move_action = dto.MouseMoveAction(
221
+ type="mouse:move",
222
+ x=dto.PixelLength(type="px", value=act.x),
223
+ y=dto.PixelLength(type="px", value=act.y),
224
+ holdKey=" ".join(act.holdKey) if act.holdKey else ""
225
+ )
226
+
227
+ action_dto = dto.ComputerUseActionDto(
228
+ action=move_action,
229
+ includeScreenShot=False,
230
+ includeCursorPosition=False
231
+ )
232
+
233
+ return self._execute_with_retry(action_dto)
234
+
235
+ def _drag(self, act: Drag) -> dto.SandboxActionResponseDto:
236
+ """执行拖拽操作"""
237
+ drag_action = dto.MouseDragAction(
238
+ type="mouse:drag",
239
+ startX=dto.PixelLength(type="px", value=act.startX),
240
+ startY=dto.PixelLength(type="px", value=act.startY),
241
+ endX=dto.PixelLength(type="px", value=act.endX),
242
+ endY=dto.PixelLength(type="px", value=act.endY),
243
+ holdKey=" ".join(act.holdKey) if act.holdKey else ""
244
+ )
245
+
246
+ action_dto = dto.ComputerUseActionDto(
247
+ action=drag_action,
248
+ includeScreenShot=False,
249
+ includeCursorPosition=False
250
+ )
251
+
252
+ return self._execute_with_retry(action_dto)
253
+
254
+ def _type(self, act: TypeText) -> dto.SandboxActionResponseDto:
255
+ """执行文本输入操作"""
256
+ type_action = dto.KeyboardTypeAction(
257
+ type="keyboard:type",
258
+ content=act.text,
259
+ treatNewLineAsEnter=True # 默认将换行符作为回车键处理
260
+ )
261
+
262
+ action_dto = dto.ComputerUseActionDto(
263
+ action=type_action,
264
+ includeScreenShot=False,
265
+ includeCursorPosition=False
266
+ )
267
+
268
+ return self._execute_with_retry(action_dto)
269
+
270
+ def _scroll(self, act: Scroll) -> dto.SandboxActionResponseDto:
271
+ """执行滚动操作"""
272
+ # 根据滚动方向确定stepVertical和stepHorizontal
273
+ step_vertical = 0
274
+ step_horizontal = 0
275
+
276
+ if act.stepVertical is not None:
277
+ step_vertical = act.stepVertical
278
+ if act.stepHorizontal is not None:
279
+ step_horizontal = act.stepHorizontal
280
+
281
+ scroll_action = dto.MouseScrollAction(
282
+ type="mouse:scroll",
283
+ x=dto.PixelLength(type="px", value=act.x),
284
+ y=dto.PixelLength(type="px", value=act.y),
285
+ stepVertical=step_vertical,
286
+ stepHorizontal=step_horizontal,
287
+ holdKey=" ".join(act.holdKey) if act.holdKey else ""
288
+ )
289
+
290
+ action_dto = dto.ComputerUseActionDto(
291
+ action=scroll_action,
292
+ includeScreenShot=False,
293
+ includeCursorPosition=False
294
+ )
295
+
296
+ return self._execute_with_retry(action_dto)
297
+
298
+ def _hotkey(self, act: Hotkey) -> dto.SandboxActionResponseDto:
299
+ """执行快捷键操作"""
300
+ # 处理持续时间
301
+ duration = 80 # 默认值
302
+ if act.duration is not None:
303
+ if 1 <= act.duration <= 5000:
304
+ duration = act.duration
305
+ else:
306
+ raise ValueError("Hotkey duration must be between 1 and 5000")
307
+
308
+ # 将键列表转换为空格分隔的字符串(根据SDK文档)
309
+ keys_str = " ".join(act.keys).lower()
310
+
311
+ hotkey_action = dto.KeyboardHotkeyAction(
312
+ type="keyboard:hotkey",
313
+ keys=keys_str,
314
+ duration=duration
315
+ )
316
+
317
+ action_dto = dto.ComputerUseActionDto(
318
+ action=hotkey_action,
319
+ includeScreenShot=False,
320
+ includeCursorPosition=False
321
+ )
322
+
323
+ return self._execute_with_retry(action_dto)
324
+
325
+ def _screenshot(self) -> Image.Image:
326
+ """
327
+ 获取屏幕截图
328
+ 使用SDK的get_screenshot方法
329
+ """
330
+ async def _get_screenshot():
331
+ return await self.sandbox_manager.get_screenshot(self.sandbox_id)
332
+
333
+ try:
334
+ url, image, b64_str = self.loop.run_until_complete(_get_screenshot())
335
+
336
+ # 返回PIL图像,保持与原LybicBackend的兼容性
337
+ # 如果需要cursor信息,可以通过其他方式获取
338
+ return image
339
+
340
+ except Exception as e:
341
+ raise RuntimeError(f"Failed to take screenshot: {e}") from e
342
+
343
+ def get_sandbox_id(self) -> str:
344
+ """获取当前沙盒ID"""
345
+ if self.sandbox_id is None:
346
+ raise RuntimeError("Sandbox ID is not available")
347
+ return self.sandbox_id
348
+
349
+ def close(self):
350
+ """关闭客户端连接"""
351
+ try:
352
+ self.loop.run_until_complete(self.client.close())
353
+ except Exception as e:
354
+ log.warning(f"Error closing Lybic client: {e}")