lybic-guiagents 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lybic-guiagents might be problematic. Click here for more details.
- gui_agents/__init__.py +63 -0
- gui_agents/agents/Action.py +3 -3
- gui_agents/agents/Backend/ADBBackend.py +62 -0
- gui_agents/agents/Backend/Backend.py +28 -0
- gui_agents/agents/Backend/LybicBackend.py +354 -0
- gui_agents/agents/Backend/PyAutoGUIBackend.py +183 -0
- gui_agents/agents/Backend/PyAutoGUIVMwareBackend.py +250 -0
- gui_agents/agents/Backend/__init__.py +0 -0
- gui_agents/agents/agent_s.py +0 -2
- gui_agents/agents/grounding.py +1 -6
- gui_agents/agents/hardware_interface.py +24 -7
- gui_agents/agents/manager.py +0 -3
- gui_agents/agents/translator.py +1 -1
- gui_agents/agents/worker.py +1 -2
- gui_agents/cli_app.py +143 -8
- gui_agents/core/engine.py +0 -2
- gui_agents/core/knowledge.py +0 -2
- gui_agents/lybic_client/__init__.py +0 -0
- gui_agents/lybic_client/lybic_client.py +88 -0
- gui_agents/prompts/__init__.py +0 -0
- gui_agents/prompts/prompts.py +869 -0
- gui_agents/service/__init__.py +19 -0
- gui_agents/service/agent_service.py +527 -0
- gui_agents/service/api_models.py +136 -0
- gui_agents/service/config.py +241 -0
- gui_agents/service/exceptions.py +35 -0
- gui_agents/store/__init__.py +0 -0
- gui_agents/store/registry.py +22 -0
- gui_agents/tools/tools.py +0 -4
- gui_agents/unit_test/test_manager.py +0 -2
- gui_agents/unit_test/test_worker.py +0 -2
- gui_agents/utils/analyze_display.py +1 -1
- gui_agents/utils/common_utils.py +0 -2
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/METADATA +203 -75
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/RECORD +38 -21
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/WHEEL +0 -0
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {lybic_guiagents-0.1.0.dist-info → lybic_guiagents-0.2.1.dist-info}/top_level.txt +0 -0
gui_agents/__init__.py
CHANGED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GUI Agents - A comprehensive GUI automation framework
|
|
3
|
+
|
|
4
|
+
This package provides both low-level agent components and a high-level service interface
|
|
5
|
+
for GUI automation tasks across different platforms and backends.
|
|
6
|
+
|
|
7
|
+
Main Components:
|
|
8
|
+
- AgentService: High-level service interface (recommended for most users)
|
|
9
|
+
- AgentS2, AgentSFast: Core agent implementations
|
|
10
|
+
- HardwareInterface: Hardware abstraction layer
|
|
11
|
+
- ServiceConfig: Configuration management
|
|
12
|
+
|
|
13
|
+
Quick Start:
|
|
14
|
+
from gui_agents import AgentService
|
|
15
|
+
|
|
16
|
+
service = AgentService()
|
|
17
|
+
result = service.execute_task("Take a screenshot")
|
|
18
|
+
print(f"Task completed: {result.status}")
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
# High-level service interface (recommended)
|
|
22
|
+
from .service import (
|
|
23
|
+
AgentService,
|
|
24
|
+
ServiceConfig,
|
|
25
|
+
TaskRequest,
|
|
26
|
+
TaskResult,
|
|
27
|
+
TaskStatus,
|
|
28
|
+
ExecutionStats,
|
|
29
|
+
AgentServiceError,
|
|
30
|
+
ConfigurationError,
|
|
31
|
+
TaskExecutionError
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Core agent classes (for advanced users)
|
|
35
|
+
from .agents.agent_s import AgentS2, AgentSFast
|
|
36
|
+
from .agents.hardware_interface import HardwareInterface
|
|
37
|
+
from .store.registry import Registry
|
|
38
|
+
from .agents.global_state import GlobalState
|
|
39
|
+
|
|
40
|
+
__version__ = "0.2.1"
|
|
41
|
+
|
|
42
|
+
# Primary exports (what users should typically use)
|
|
43
|
+
__all__ = [
|
|
44
|
+
# High-level service interface
|
|
45
|
+
"AgentService",
|
|
46
|
+
"ServiceConfig",
|
|
47
|
+
"TaskRequest",
|
|
48
|
+
"TaskResult",
|
|
49
|
+
"TaskStatus",
|
|
50
|
+
"ExecutionStats",
|
|
51
|
+
|
|
52
|
+
# Exceptions
|
|
53
|
+
"AgentServiceError",
|
|
54
|
+
"ConfigurationError",
|
|
55
|
+
"TaskExecutionError",
|
|
56
|
+
|
|
57
|
+
# Core classes (for advanced usage)
|
|
58
|
+
"AgentS2",
|
|
59
|
+
"AgentSFast",
|
|
60
|
+
"HardwareInterface",
|
|
61
|
+
"Registry",
|
|
62
|
+
"GlobalState",
|
|
63
|
+
]
|
gui_agents/agents/Action.py
CHANGED
|
@@ -27,9 +27,9 @@ The registry makes the last line work without an if‑else chain.
|
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
29
|
from abc import ABC
|
|
30
|
-
from dataclasses import dataclass, field, fields
|
|
31
|
-
from enum import Enum
|
|
32
|
-
from typing import Any, Dict, List,
|
|
30
|
+
from dataclasses import dataclass, field, fields
|
|
31
|
+
from enum import Enum
|
|
32
|
+
from typing import Any, Dict, List, Type, TypeVar, ClassVar
|
|
33
33
|
|
|
34
34
|
__all__ = [
|
|
35
35
|
"Action",
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# ---------------------------------------------------------------------------
|
|
2
|
+
# 2) Android device backend (ADB)
|
|
3
|
+
# ---------------------------------------------------------------------------
|
|
4
|
+
from gui_agents.agents.Action import (
|
|
5
|
+
Action,
|
|
6
|
+
Click,
|
|
7
|
+
Drag,
|
|
8
|
+
TypeText,
|
|
9
|
+
Scroll,
|
|
10
|
+
Hotkey,
|
|
11
|
+
Wait,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from gui_agents.agents.Backend.Backend import Backend
|
|
15
|
+
import time
|
|
16
|
+
import subprocess
|
|
17
|
+
|
|
18
|
+
class ADBBackend(Backend):
|
|
19
|
+
"""Very light‑weight ADB backend (tap / swipe / text / keyevent)."""
|
|
20
|
+
|
|
21
|
+
_supported = {Click, Drag, TypeText, Hotkey, Wait}
|
|
22
|
+
|
|
23
|
+
def __init__(self, serial: str | None = None):
|
|
24
|
+
self.serial = serial # specify target device; None = default
|
|
25
|
+
|
|
26
|
+
# ------------------------------------------------------------------
|
|
27
|
+
def execute(self, action: Action) -> None:
|
|
28
|
+
if not self.supports(type(action)):
|
|
29
|
+
raise NotImplementedError
|
|
30
|
+
|
|
31
|
+
prefix = ["adb"]
|
|
32
|
+
if self.serial:
|
|
33
|
+
prefix += ["-s", self.serial]
|
|
34
|
+
prefix.append("shell")
|
|
35
|
+
|
|
36
|
+
if isinstance(action, Click):
|
|
37
|
+
cmd = prefix
|
|
38
|
+
# cmd = prefix + ["input", "tap", str(action.xy[0]), str(action.xy[1])]
|
|
39
|
+
elif isinstance(action, Drag):
|
|
40
|
+
cmd = prefix + [
|
|
41
|
+
"input", "swipe",
|
|
42
|
+
# str(action.start[0]), str(action.start[1]),
|
|
43
|
+
# str(action.end[0]), str(action.end[1]),
|
|
44
|
+
# str(int(action.duration * 1000)), # type: ignore
|
|
45
|
+
]
|
|
46
|
+
elif isinstance(action, TypeText):
|
|
47
|
+
text = action.text.replace(" ", "%s") # escape spaces
|
|
48
|
+
cmd = prefix + ["input", "text", text]
|
|
49
|
+
# if action.press_enter:
|
|
50
|
+
# subprocess.run(prefix + ["input", "keyevent", "ENTER"], check=True)
|
|
51
|
+
# return
|
|
52
|
+
elif isinstance(action, Hotkey):
|
|
53
|
+
# Map first key for demo purposes
|
|
54
|
+
key = action.keys[0].upper()
|
|
55
|
+
cmd = prefix + ["input", "keyevent", key]
|
|
56
|
+
elif isinstance(action, Wait):
|
|
57
|
+
time.sleep(action.seconds) # type: ignore
|
|
58
|
+
return
|
|
59
|
+
else:
|
|
60
|
+
raise NotImplementedError
|
|
61
|
+
|
|
62
|
+
subprocess.run(cmd, check=True)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Abstract backend base‑class
|
|
2
|
+
# ---------------------------------------------------------------------------
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Any, List, Type, Dict, Set
|
|
5
|
+
from gui_agents.agents.Action import (
|
|
6
|
+
Action
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Backend(ABC):
|
|
11
|
+
"""Abstract base for platform back‑ends."""
|
|
12
|
+
|
|
13
|
+
#: Each backend advertises which Action subclasses it supports.
|
|
14
|
+
_supported: Set[Type[Action]] = set()
|
|
15
|
+
|
|
16
|
+
# ---------------------------------------------------------------------
|
|
17
|
+
def supports(self, action_type: Type[Action]) -> bool:
|
|
18
|
+
return action_type in self._supported
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def execute(self, action: Action) -> Any:
|
|
23
|
+
"""Translate an *Action* into concrete commands.
|
|
24
|
+
|
|
25
|
+
Should raise **NotImplementedError** if the *action* type is not in
|
|
26
|
+
`self._supported`, so upper layers can decide how to degrade / retry.
|
|
27
|
+
"""
|
|
28
|
+
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
# ---------------------------------------------------------------------------
|
|
2
|
+
# 3) Cloud desktop / custom device backend using Official Lybic Python SDK
|
|
3
|
+
# https://lybic.ai/docs/sdk/python
|
|
4
|
+
# ---------------------------------------------------------------------------
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import time
|
|
8
|
+
import os
|
|
9
|
+
from typing import Dict, Any, Optional
|
|
10
|
+
from PIL import Image
|
|
11
|
+
|
|
12
|
+
from gui_agents.agents.Action import (
|
|
13
|
+
Action,
|
|
14
|
+
Click,
|
|
15
|
+
DoubleClick,
|
|
16
|
+
Move,
|
|
17
|
+
Drag,
|
|
18
|
+
TypeText,
|
|
19
|
+
Scroll,
|
|
20
|
+
Hotkey,
|
|
21
|
+
Wait,
|
|
22
|
+
Screenshot,
|
|
23
|
+
Memorize
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from gui_agents.agents.Backend.Backend import Backend
|
|
27
|
+
|
|
28
|
+
# 导入官方Lybic SDK
|
|
29
|
+
try:
|
|
30
|
+
from lybic import LybicClient, Sandbox, ComputerUse, dto
|
|
31
|
+
except ImportError:
|
|
32
|
+
raise ImportError(
|
|
33
|
+
"Lybic Python SDK not found. Please install it with: pip install --upgrade lybic"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
log = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class LybicBackend(Backend):
|
|
41
|
+
"""
|
|
42
|
+
基于官方Lybic Python SDK的Backend实现
|
|
43
|
+
支持与原LybicBackend相同的Action类型,但使用官方SDK替代HTTP调用
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
_supported = {Click, DoubleClick, Move, Drag, TypeText, Scroll, Hotkey,
|
|
47
|
+
Wait, Screenshot, Memorize}
|
|
48
|
+
|
|
49
|
+
def __init__(self,
|
|
50
|
+
api_key: Optional[str] = None,
|
|
51
|
+
org_id: Optional[str] = None,
|
|
52
|
+
endpoint: Optional[str] = None,
|
|
53
|
+
timeout: int = 10,
|
|
54
|
+
extra_headers: Optional[Dict[str, str]] = None,
|
|
55
|
+
sandbox_opts: Optional[Dict[str, Any]] = None,
|
|
56
|
+
max_retries: int = 2,
|
|
57
|
+
precreate_sid: str = '',
|
|
58
|
+
**kwargs):
|
|
59
|
+
"""
|
|
60
|
+
初始化LybicBackend
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
api_key: Lybic API密钥,如果为None则从环境变量LYBIC_API_KEY获取
|
|
64
|
+
org_id: Lybic组织ID,如果为None则从环境变量LYBIC_ORG_ID获取
|
|
65
|
+
endpoint: API端点,如果为None则从环境变量LYBIC_API_ENDPOINT获取
|
|
66
|
+
timeout: API请求超时时间
|
|
67
|
+
extra_headers: 额外的HTTP头
|
|
68
|
+
sandbox_opts: 创建沙盒时的额外选项
|
|
69
|
+
max_retries: 最大重试次数
|
|
70
|
+
precreate_sid: 预创建的沙盒ID,如果提供则不会创建新沙盒
|
|
71
|
+
"""
|
|
72
|
+
self.loop = asyncio.new_event_loop()
|
|
73
|
+
asyncio.set_event_loop(self.loop)
|
|
74
|
+
|
|
75
|
+
# 初始化参数
|
|
76
|
+
self.api_key = api_key or os.getenv("LYBIC_API_KEY")
|
|
77
|
+
self.org_id = org_id or os.getenv("LYBIC_ORG_ID")
|
|
78
|
+
self.endpoint = endpoint or os.getenv("LYBIC_API_ENDPOINT", "https://api.lybic.cn")
|
|
79
|
+
self.timeout = timeout
|
|
80
|
+
self.extra_headers = extra_headers
|
|
81
|
+
self.max_retries = max_retries
|
|
82
|
+
self.precreate_sid = precreate_sid or os.getenv("LYBIC_PRECREATE_SID", "")
|
|
83
|
+
|
|
84
|
+
# 初始化SDK客户端(仅在有必要参数时)
|
|
85
|
+
if self.api_key and self.org_id:
|
|
86
|
+
self.client = LybicClient(
|
|
87
|
+
org_id=self.org_id,
|
|
88
|
+
api_key=self.api_key,
|
|
89
|
+
endpoint=self.endpoint,
|
|
90
|
+
timeout=self.timeout,
|
|
91
|
+
extra_headers=self.extra_headers or {}
|
|
92
|
+
)
|
|
93
|
+
else:
|
|
94
|
+
raise ValueError("LYBIC_API_KEY and LYBIC_ORG_ID are required. Please set them as environment variables or pass them as arguments.")
|
|
95
|
+
|
|
96
|
+
# 初始化SDK组件
|
|
97
|
+
self.sandbox_manager = Sandbox(self.client)
|
|
98
|
+
self.computer_use = ComputerUse(self.client)
|
|
99
|
+
|
|
100
|
+
# 沙盒ID
|
|
101
|
+
self.sandbox_id = self.precreate_sid
|
|
102
|
+
|
|
103
|
+
# 如果没有预创建的沙盒ID,则创建新沙盒
|
|
104
|
+
if self.sandbox_id is None:
|
|
105
|
+
print("Creating sandbox using official SDK...")
|
|
106
|
+
max_life_seconds = int(os.getenv("LYBIC_MAX_LIFE_SECONDS", "3600"))
|
|
107
|
+
sandbox_opts = sandbox_opts or {}
|
|
108
|
+
sandbox_opts.setdefault("maxLifeSeconds", max_life_seconds)
|
|
109
|
+
|
|
110
|
+
new_sandbox = self.loop.run_until_complete(
|
|
111
|
+
self.sandbox_manager.create(
|
|
112
|
+
name=sandbox_opts.get("name", "agent-run"),
|
|
113
|
+
**sandbox_opts
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
# 使用getattr以防属性名不同
|
|
117
|
+
self.sandbox_id = getattr(new_sandbox, 'id', "") or getattr(new_sandbox, 'sandbox_id', "")
|
|
118
|
+
if not self.sandbox_id:
|
|
119
|
+
raise RuntimeError(f"Failed to get sandbox ID from response: {new_sandbox}")
|
|
120
|
+
print(f"Created sandbox: {self.sandbox_id}")
|
|
121
|
+
|
|
122
|
+
def __del__(self):
|
|
123
|
+
"""清理资源"""
|
|
124
|
+
try:
|
|
125
|
+
if hasattr(self, 'client'):
|
|
126
|
+
self.loop.run_until_complete(self.client.close())
|
|
127
|
+
except Exception as e:
|
|
128
|
+
log.warning(f"Error closing Lybic client: {e}")
|
|
129
|
+
|
|
130
|
+
def execute(self, action: Action) -> Any:
|
|
131
|
+
"""
|
|
132
|
+
执行Action,将其转换为Lybic SDK调用
|
|
133
|
+
"""
|
|
134
|
+
if not self.supports(type(action)):
|
|
135
|
+
raise NotImplementedError(f"{type(action).__name__} unsupported")
|
|
136
|
+
if not self.sandbox_id:
|
|
137
|
+
raise RuntimeError("Sandbox ID is empty; create a sandbox first (precreate_sid or auto-create).")
|
|
138
|
+
|
|
139
|
+
if isinstance(action, Click):
|
|
140
|
+
return self._click(action)
|
|
141
|
+
elif isinstance(action, DoubleClick):
|
|
142
|
+
return self._double_click(action)
|
|
143
|
+
elif isinstance(action, Move):
|
|
144
|
+
return self._move(action)
|
|
145
|
+
elif isinstance(action, Drag):
|
|
146
|
+
return self._drag(action)
|
|
147
|
+
elif isinstance(action, TypeText):
|
|
148
|
+
return self._type(action)
|
|
149
|
+
elif isinstance(action, Scroll):
|
|
150
|
+
return self._scroll(action)
|
|
151
|
+
elif isinstance(action, Hotkey):
|
|
152
|
+
return self._hotkey(action)
|
|
153
|
+
elif isinstance(action, Screenshot):
|
|
154
|
+
return self._screenshot()
|
|
155
|
+
elif isinstance(action, Wait):
|
|
156
|
+
duration = action.duration if action.duration is not None else 0.2
|
|
157
|
+
time.sleep(duration)
|
|
158
|
+
elif isinstance(action, Memorize):
|
|
159
|
+
log.info(f"Memorizing information: {action.information}")
|
|
160
|
+
|
|
161
|
+
def _execute_with_retry(self, action_dto: dto.ComputerUseActionDto) -> dto.SandboxActionResponseDto:
|
|
162
|
+
"""
|
|
163
|
+
带重试机制的执行方法
|
|
164
|
+
"""
|
|
165
|
+
async def _execute():
|
|
166
|
+
return await self.computer_use.execute_computer_use_action(
|
|
167
|
+
sandbox_id=self.sandbox_id,
|
|
168
|
+
data=action_dto
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
exc: Optional[Exception] = None
|
|
172
|
+
for attempt in range(1, self.max_retries + 2):
|
|
173
|
+
try:
|
|
174
|
+
return self.loop.run_until_complete(_execute())
|
|
175
|
+
except Exception as e:
|
|
176
|
+
exc = e
|
|
177
|
+
log.warning(f"Lybic SDK action failed (try {attempt}/{self.max_retries+1}): {e}")
|
|
178
|
+
time.sleep(0.4 * attempt) # 退避策略
|
|
179
|
+
|
|
180
|
+
raise RuntimeError(f"Lybic SDK action failed after {self.max_retries + 1} attempts: {exc}") from exc
|
|
181
|
+
|
|
182
|
+
def _click(self, act: Click) -> dto.SandboxActionResponseDto:
|
|
183
|
+
"""执行点击操作"""
|
|
184
|
+
click_action = dto.MouseClickAction(
|
|
185
|
+
type="mouse:click",
|
|
186
|
+
x=dto.PixelLength(type="px", value=act.x),
|
|
187
|
+
y=dto.PixelLength(type="px", value=act.y),
|
|
188
|
+
button=1 if act.button == 0 else 2, # 0=左键, 1=右键 -> 1=左键, 2=右键
|
|
189
|
+
holdKey=" ".join(act.holdKey) if act.holdKey else ""
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
action_dto = dto.ComputerUseActionDto(
|
|
193
|
+
action=click_action,
|
|
194
|
+
includeScreenShot=False,
|
|
195
|
+
includeCursorPosition=False
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
return self._execute_with_retry(action_dto)
|
|
199
|
+
|
|
200
|
+
def _double_click(self, act: DoubleClick) -> dto.SandboxActionResponseDto:
|
|
201
|
+
"""执行双击操作"""
|
|
202
|
+
double_click_action = dto.MouseDoubleClickAction(
|
|
203
|
+
type="mouse:doubleClick",
|
|
204
|
+
x=dto.PixelLength(type="px", value=act.x),
|
|
205
|
+
y=dto.PixelLength(type="px", value=act.y),
|
|
206
|
+
button=1 if act.button == 0 else 2,
|
|
207
|
+
holdKey=" ".join(act.holdKey) if act.holdKey else ""
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
action_dto = dto.ComputerUseActionDto(
|
|
211
|
+
action=double_click_action,
|
|
212
|
+
includeScreenShot=False,
|
|
213
|
+
includeCursorPosition=False
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
return self._execute_with_retry(action_dto)
|
|
217
|
+
|
|
218
|
+
def _move(self, act: Move) -> dto.SandboxActionResponseDto:
|
|
219
|
+
"""执行鼠标移动操作"""
|
|
220
|
+
move_action = dto.MouseMoveAction(
|
|
221
|
+
type="mouse:move",
|
|
222
|
+
x=dto.PixelLength(type="px", value=act.x),
|
|
223
|
+
y=dto.PixelLength(type="px", value=act.y),
|
|
224
|
+
holdKey=" ".join(act.holdKey) if act.holdKey else ""
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
action_dto = dto.ComputerUseActionDto(
|
|
228
|
+
action=move_action,
|
|
229
|
+
includeScreenShot=False,
|
|
230
|
+
includeCursorPosition=False
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
return self._execute_with_retry(action_dto)
|
|
234
|
+
|
|
235
|
+
def _drag(self, act: Drag) -> dto.SandboxActionResponseDto:
|
|
236
|
+
"""执行拖拽操作"""
|
|
237
|
+
drag_action = dto.MouseDragAction(
|
|
238
|
+
type="mouse:drag",
|
|
239
|
+
startX=dto.PixelLength(type="px", value=act.startX),
|
|
240
|
+
startY=dto.PixelLength(type="px", value=act.startY),
|
|
241
|
+
endX=dto.PixelLength(type="px", value=act.endX),
|
|
242
|
+
endY=dto.PixelLength(type="px", value=act.endY),
|
|
243
|
+
holdKey=" ".join(act.holdKey) if act.holdKey else ""
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
action_dto = dto.ComputerUseActionDto(
|
|
247
|
+
action=drag_action,
|
|
248
|
+
includeScreenShot=False,
|
|
249
|
+
includeCursorPosition=False
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
return self._execute_with_retry(action_dto)
|
|
253
|
+
|
|
254
|
+
def _type(self, act: TypeText) -> dto.SandboxActionResponseDto:
|
|
255
|
+
"""执行文本输入操作"""
|
|
256
|
+
type_action = dto.KeyboardTypeAction(
|
|
257
|
+
type="keyboard:type",
|
|
258
|
+
content=act.text,
|
|
259
|
+
treatNewLineAsEnter=True # 默认将换行符作为回车键处理
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
action_dto = dto.ComputerUseActionDto(
|
|
263
|
+
action=type_action,
|
|
264
|
+
includeScreenShot=False,
|
|
265
|
+
includeCursorPosition=False
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
return self._execute_with_retry(action_dto)
|
|
269
|
+
|
|
270
|
+
def _scroll(self, act: Scroll) -> dto.SandboxActionResponseDto:
|
|
271
|
+
"""执行滚动操作"""
|
|
272
|
+
# 根据滚动方向确定stepVertical和stepHorizontal
|
|
273
|
+
step_vertical = 0
|
|
274
|
+
step_horizontal = 0
|
|
275
|
+
|
|
276
|
+
if act.stepVertical is not None:
|
|
277
|
+
step_vertical = act.stepVertical
|
|
278
|
+
if act.stepHorizontal is not None:
|
|
279
|
+
step_horizontal = act.stepHorizontal
|
|
280
|
+
|
|
281
|
+
scroll_action = dto.MouseScrollAction(
|
|
282
|
+
type="mouse:scroll",
|
|
283
|
+
x=dto.PixelLength(type="px", value=act.x),
|
|
284
|
+
y=dto.PixelLength(type="px", value=act.y),
|
|
285
|
+
stepVertical=step_vertical,
|
|
286
|
+
stepHorizontal=step_horizontal,
|
|
287
|
+
holdKey=" ".join(act.holdKey) if act.holdKey else ""
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
action_dto = dto.ComputerUseActionDto(
|
|
291
|
+
action=scroll_action,
|
|
292
|
+
includeScreenShot=False,
|
|
293
|
+
includeCursorPosition=False
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
return self._execute_with_retry(action_dto)
|
|
297
|
+
|
|
298
|
+
def _hotkey(self, act: Hotkey) -> dto.SandboxActionResponseDto:
|
|
299
|
+
"""执行快捷键操作"""
|
|
300
|
+
# 处理持续时间
|
|
301
|
+
duration = 80 # 默认值
|
|
302
|
+
if act.duration is not None:
|
|
303
|
+
if 1 <= act.duration <= 5000:
|
|
304
|
+
duration = act.duration
|
|
305
|
+
else:
|
|
306
|
+
raise ValueError("Hotkey duration must be between 1 and 5000")
|
|
307
|
+
|
|
308
|
+
# 将键列表转换为空格分隔的字符串(根据SDK文档)
|
|
309
|
+
keys_str = " ".join(act.keys).lower()
|
|
310
|
+
|
|
311
|
+
hotkey_action = dto.KeyboardHotkeyAction(
|
|
312
|
+
type="keyboard:hotkey",
|
|
313
|
+
keys=keys_str,
|
|
314
|
+
duration=duration
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
action_dto = dto.ComputerUseActionDto(
|
|
318
|
+
action=hotkey_action,
|
|
319
|
+
includeScreenShot=False,
|
|
320
|
+
includeCursorPosition=False
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
return self._execute_with_retry(action_dto)
|
|
324
|
+
|
|
325
|
+
def _screenshot(self) -> Image.Image:
|
|
326
|
+
"""
|
|
327
|
+
获取屏幕截图
|
|
328
|
+
使用SDK的get_screenshot方法
|
|
329
|
+
"""
|
|
330
|
+
async def _get_screenshot():
|
|
331
|
+
return await self.sandbox_manager.get_screenshot(self.sandbox_id)
|
|
332
|
+
|
|
333
|
+
try:
|
|
334
|
+
url, image, b64_str = self.loop.run_until_complete(_get_screenshot())
|
|
335
|
+
|
|
336
|
+
# 返回PIL图像,保持与原LybicBackend的兼容性
|
|
337
|
+
# 如果需要cursor信息,可以通过其他方式获取
|
|
338
|
+
return image
|
|
339
|
+
|
|
340
|
+
except Exception as e:
|
|
341
|
+
raise RuntimeError(f"Failed to take screenshot: {e}") from e
|
|
342
|
+
|
|
343
|
+
def get_sandbox_id(self) -> str:
|
|
344
|
+
"""获取当前沙盒ID"""
|
|
345
|
+
if self.sandbox_id is None:
|
|
346
|
+
raise RuntimeError("Sandbox ID is not available")
|
|
347
|
+
return self.sandbox_id
|
|
348
|
+
|
|
349
|
+
def close(self):
|
|
350
|
+
"""关闭客户端连接"""
|
|
351
|
+
try:
|
|
352
|
+
self.loop.run_until_complete(self.client.close())
|
|
353
|
+
except Exception as e:
|
|
354
|
+
log.warning(f"Error closing Lybic client: {e}")
|