lybic-guiagents 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lybic-guiagents might be problematic. Click here for more details.

gui_agents/__init__.py CHANGED
@@ -0,0 +1,67 @@
1
+ """
2
+ GUI Agents - A comprehensive GUI automation framework
3
+
4
+ This package provides both low-level agent components and a high-level service interface
5
+ for GUI automation tasks across different platforms and backends.
6
+
7
+ Main Components:
8
+ - AgentService: High-level service interface (recommended for most users)
9
+ - AgentS2, AgentSFast: Core agent implementations
10
+ - HardwareInterface: Hardware abstraction layer
11
+ - ServiceConfig: Configuration management
12
+
13
+ Quick Start:
14
+ from gui_agents import AgentService
15
+
16
+ service = AgentService()
17
+ result = service.execute_task("Take a screenshot")
18
+ print(f"Task completed: {result.status}")
19
+ """
20
+
21
+ # High-level service interface (recommended)
22
+ from .service import (
23
+ AgentService,
24
+ ServiceConfig,
25
+ TaskRequest,
26
+ TaskResult,
27
+ TaskStatus,
28
+ ExecutionStats,
29
+ AgentServiceError,
30
+ ConfigurationError,
31
+ TaskExecutionError
32
+ )
33
+
34
+ # Core agent classes (for advanced users)
35
+ from .agents.agent_s import AgentS2, AgentSFast
36
+ from .agents.hardware_interface import HardwareInterface
37
+ from .store.registry import Registry
38
+ from .agents.global_state import GlobalState
39
+
40
+ try:
41
+ from importlib.metadata import version
42
+ __version__ = version("lybic-guiagents")
43
+ except Exception: # during editable installs or missing meta
44
+ __version__ = "0.0.0+dev"
45
+
46
+ # Primary exports (what users should typically use)
47
+ __all__ = [
48
+ # High-level service interface
49
+ "AgentService",
50
+ "ServiceConfig",
51
+ "TaskRequest",
52
+ "TaskResult",
53
+ "TaskStatus",
54
+ "ExecutionStats",
55
+
56
+ # Exceptions
57
+ "AgentServiceError",
58
+ "ConfigurationError",
59
+ "TaskExecutionError",
60
+
61
+ # Core classes (for advanced usage)
62
+ "AgentS2",
63
+ "AgentSFast",
64
+ "HardwareInterface",
65
+ "Registry",
66
+ "GlobalState",
67
+ ]
@@ -0,0 +1,62 @@
1
+ # ---------------------------------------------------------------------------
2
+ # 2) Android device backend (ADB)
3
+ # ---------------------------------------------------------------------------
4
+ from gui_agents.agents.Action import (
5
+ Action,
6
+ Click,
7
+ Drag,
8
+ TypeText,
9
+ Scroll,
10
+ Hotkey,
11
+ Wait,
12
+ )
13
+
14
+ from gui_agents.agents.Backend.Backend import Backend
15
+ import time
16
+ import subprocess
17
+
18
+ class ADBBackend(Backend):
19
+ """Very light‑weight ADB backend (tap / swipe / text / keyevent)."""
20
+
21
+ _supported = {Click, Drag, TypeText, Hotkey, Wait}
22
+
23
+ def __init__(self, serial: str | None = None):
24
+ self.serial = serial # specify target device; None = default
25
+
26
+ # ------------------------------------------------------------------
27
+ def execute(self, action: Action) -> None:
28
+ if not self.supports(type(action)):
29
+ raise NotImplementedError
30
+
31
+ prefix = ["adb"]
32
+ if self.serial:
33
+ prefix += ["-s", self.serial]
34
+ prefix.append("shell")
35
+
36
+ if isinstance(action, Click):
37
+ cmd = prefix
38
+ # cmd = prefix + ["input", "tap", str(action.xy[0]), str(action.xy[1])]
39
+ elif isinstance(action, Drag):
40
+ cmd = prefix + [
41
+ "input", "swipe",
42
+ # str(action.start[0]), str(action.start[1]),
43
+ # str(action.end[0]), str(action.end[1]),
44
+ # str(int(action.duration * 1000)), # type: ignore
45
+ ]
46
+ elif isinstance(action, TypeText):
47
+ text = action.text.replace(" ", "%s") # escape spaces
48
+ cmd = prefix + ["input", "text", text]
49
+ # if action.press_enter:
50
+ # subprocess.run(prefix + ["input", "keyevent", "ENTER"], check=True)
51
+ # return
52
+ elif isinstance(action, Hotkey):
53
+ # Map first key for demo purposes
54
+ key = action.keys[0].upper()
55
+ cmd = prefix + ["input", "keyevent", key]
56
+ elif isinstance(action, Wait):
57
+ time.sleep(action.seconds) # type: ignore
58
+ return
59
+ else:
60
+ raise NotImplementedError
61
+
62
+ subprocess.run(cmd, check=True)
@@ -0,0 +1,28 @@
1
+ # Abstract backend base‑class
2
+ # ---------------------------------------------------------------------------
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any, List, Type, Dict, Set
5
+ from gui_agents.agents.Action import (
6
+ Action
7
+ )
8
+
9
+
10
+ class Backend(ABC):
11
+ """Abstract base for platform back‑ends."""
12
+
13
+ #: Each backend advertises which Action subclasses it supports.
14
+ _supported: Set[Type[Action]] = set()
15
+
16
+ # ---------------------------------------------------------------------
17
+ def supports(self, action_type: Type[Action]) -> bool:
18
+ return action_type in self._supported
19
+
20
+ # ---------------------------------------------------------------------
21
+ @abstractmethod
22
+ def execute(self, action: Action) -> Any:
23
+ """Translate an *Action* into concrete commands.
24
+
25
+ Should raise **NotImplementedError** if the *action* type is not in
26
+ `self._supported`, so upper layers can decide how to degrade / retry.
27
+ """
28
+
@@ -0,0 +1,355 @@
1
+ # ---------------------------------------------------------------------------
2
+ # 3) Cloud desktop / custom device backend using Official Lybic Python SDK
3
+ # https://lybic.ai/docs/sdk/python
4
+ # ---------------------------------------------------------------------------
5
+ import asyncio
6
+ import logging
7
+ import time
8
+ import os
9
+ from typing import Dict, Any, Optional, Union
10
+ from io import BytesIO
11
+ from PIL import Image
12
+
13
+ from gui_agents.agents.Action import (
14
+ Action,
15
+ Click,
16
+ DoubleClick,
17
+ Move,
18
+ Drag,
19
+ TypeText,
20
+ Scroll,
21
+ Hotkey,
22
+ Wait,
23
+ Screenshot,
24
+ Memorize
25
+ )
26
+
27
+ from gui_agents.agents.Backend.Backend import Backend
28
+
29
+ # 导入官方Lybic SDK
30
+ try:
31
+ from lybic import LybicClient, Sandbox, ComputerUse, dto
32
+ except ImportError:
33
+ raise ImportError(
34
+ "Lybic Python SDK not found. Please install it with: pip install --upgrade lybic"
35
+ )
36
+
37
+
38
+ log = logging.getLogger(__name__)
39
+
40
+
41
+ class LybicBackend(Backend):
42
+ """
43
+ 基于官方Lybic Python SDK的Backend实现
44
+ 支持与原LybicBackend相同的Action类型,但使用官方SDK替代HTTP调用
45
+ """
46
+
47
+ _supported = {Click, DoubleClick, Move, Drag, TypeText, Scroll, Hotkey,
48
+ Wait, Screenshot, Memorize}
49
+
50
+ def __init__(self,
51
+ api_key: Optional[str] = None,
52
+ org_id: Optional[str] = None,
53
+ endpoint: Optional[str] = None,
54
+ timeout: int = 10,
55
+ extra_headers: Optional[Dict[str, str]] = None,
56
+ sandbox_opts: Optional[Dict[str, Any]] = None,
57
+ max_retries: int = 2,
58
+ precreate_sid: str = '',
59
+ **kwargs):
60
+ """
61
+ 初始化LybicBackend
62
+
63
+ Args:
64
+ api_key: Lybic API密钥,如果为None则从环境变量LYBIC_API_KEY获取
65
+ org_id: Lybic组织ID,如果为None则从环境变量LYBIC_ORG_ID获取
66
+ endpoint: API端点,如果为None则从环境变量LYBIC_API_ENDPOINT获取
67
+ timeout: API请求超时时间
68
+ extra_headers: 额外的HTTP头
69
+ sandbox_opts: 创建沙盒时的额外选项
70
+ max_retries: 最大重试次数
71
+ precreate_sid: 预创建的沙盒ID,如果提供则不会创建新沙盒
72
+ """
73
+ self.loop = asyncio.new_event_loop()
74
+ asyncio.set_event_loop(self.loop)
75
+
76
+ # 初始化参数
77
+ self.api_key = api_key or os.getenv("LYBIC_API_KEY")
78
+ self.org_id = org_id or os.getenv("LYBIC_ORG_ID")
79
+ self.endpoint = endpoint or os.getenv("LYBIC_API_ENDPOINT", "https://api.lybic.cn")
80
+ self.timeout = timeout
81
+ self.extra_headers = extra_headers
82
+ self.max_retries = max_retries
83
+ self.precreate_sid = precreate_sid or os.getenv("LYBIC_PRECREATE_SID", "")
84
+
85
+ # 初始化SDK客户端(仅在有必要参数时)
86
+ if self.api_key and self.org_id:
87
+ self.client = LybicClient(
88
+ org_id=self.org_id,
89
+ api_key=self.api_key,
90
+ endpoint=self.endpoint,
91
+ timeout=self.timeout,
92
+ extra_headers=self.extra_headers or {}
93
+ )
94
+ else:
95
+ raise ValueError("LYBIC_API_KEY and LYBIC_ORG_ID are required. Please set them as environment variables or pass them as arguments.")
96
+
97
+ # 初始化SDK组件
98
+ self.sandbox_manager = Sandbox(self.client)
99
+ self.computer_use = ComputerUse(self.client)
100
+
101
+ # 沙盒ID
102
+ self.sandbox_id = self.precreate_sid
103
+
104
+ # 如果没有预创建的沙盒ID,则创建新沙盒
105
+ if self.sandbox_id is None:
106
+ print("Creating sandbox using official SDK...")
107
+ max_life_seconds = int(os.getenv("LYBIC_MAX_LIFE_SECONDS", "3600"))
108
+ sandbox_opts = sandbox_opts or {}
109
+ sandbox_opts.setdefault("maxLifeSeconds", max_life_seconds)
110
+
111
+ new_sandbox = self.loop.run_until_complete(
112
+ self.sandbox_manager.create(
113
+ name=sandbox_opts.get("name", "agent-run"),
114
+ **sandbox_opts
115
+ )
116
+ )
117
+ # 使用getattr以防属性名不同
118
+ self.sandbox_id = getattr(new_sandbox, 'id', "") or getattr(new_sandbox, 'sandbox_id', "")
119
+ if not self.sandbox_id:
120
+ raise RuntimeError(f"Failed to get sandbox ID from response: {new_sandbox}")
121
+ print(f"Created sandbox: {self.sandbox_id}")
122
+
123
+ def __del__(self):
124
+ """清理资源"""
125
+ try:
126
+ if hasattr(self, 'client'):
127
+ self.loop.run_until_complete(self.client.close())
128
+ except Exception as e:
129
+ log.warning(f"Error closing Lybic client: {e}")
130
+
131
+ def execute(self, action: Action) -> Any:
132
+ """
133
+ 执行Action,将其转换为Lybic SDK调用
134
+ """
135
+ if not self.supports(type(action)):
136
+ raise NotImplementedError(f"{type(action).__name__} unsupported")
137
+ if not self.sandbox_id:
138
+ raise RuntimeError("Sandbox ID is empty; create a sandbox first (precreate_sid or auto-create).")
139
+
140
+ if isinstance(action, Click):
141
+ return self._click(action)
142
+ elif isinstance(action, DoubleClick):
143
+ return self._double_click(action)
144
+ elif isinstance(action, Move):
145
+ return self._move(action)
146
+ elif isinstance(action, Drag):
147
+ return self._drag(action)
148
+ elif isinstance(action, TypeText):
149
+ return self._type(action)
150
+ elif isinstance(action, Scroll):
151
+ return self._scroll(action)
152
+ elif isinstance(action, Hotkey):
153
+ return self._hotkey(action)
154
+ elif isinstance(action, Screenshot):
155
+ return self._screenshot()
156
+ elif isinstance(action, Wait):
157
+ duration = action.duration if action.duration is not None else 0.2
158
+ time.sleep(duration)
159
+ elif isinstance(action, Memorize):
160
+ log.info(f"Memorizing information: {action.information}")
161
+
162
+ def _execute_with_retry(self, action_dto: dto.ComputerUseActionDto) -> dto.SandboxActionResponseDto:
163
+ """
164
+ 带重试机制的执行方法
165
+ """
166
+ async def _execute():
167
+ return await self.computer_use.execute_computer_use_action(
168
+ sandbox_id=self.sandbox_id,
169
+ data=action_dto
170
+ )
171
+
172
+ exc: Optional[Exception] = None
173
+ for attempt in range(1, self.max_retries + 2):
174
+ try:
175
+ return self.loop.run_until_complete(_execute())
176
+ except Exception as e:
177
+ exc = e
178
+ log.warning(f"Lybic SDK action failed (try {attempt}/{self.max_retries+1}): {e}")
179
+ time.sleep(0.4 * attempt) # 退避策略
180
+
181
+ raise RuntimeError(f"Lybic SDK action failed after {self.max_retries + 1} attempts: {exc}") from exc
182
+
183
+ def _click(self, act: Click) -> dto.SandboxActionResponseDto:
184
+ """执行点击操作"""
185
+ click_action = dto.MouseClickAction(
186
+ type="mouse:click",
187
+ x=dto.PixelLength(type="px", value=act.x),
188
+ y=dto.PixelLength(type="px", value=act.y),
189
+ button=1 if act.button == 0 else 2, # 0=左键, 1=右键 -> 1=左键, 2=右键
190
+ holdKey=" ".join(act.holdKey) if act.holdKey else ""
191
+ )
192
+
193
+ action_dto = dto.ComputerUseActionDto(
194
+ action=click_action,
195
+ includeScreenShot=False,
196
+ includeCursorPosition=False
197
+ )
198
+
199
+ return self._execute_with_retry(action_dto)
200
+
201
+ def _double_click(self, act: DoubleClick) -> dto.SandboxActionResponseDto:
202
+ """执行双击操作"""
203
+ double_click_action = dto.MouseDoubleClickAction(
204
+ type="mouse:doubleClick",
205
+ x=dto.PixelLength(type="px", value=act.x),
206
+ y=dto.PixelLength(type="px", value=act.y),
207
+ button=1 if act.button == 0 else 2,
208
+ holdKey=" ".join(act.holdKey) if act.holdKey else ""
209
+ )
210
+
211
+ action_dto = dto.ComputerUseActionDto(
212
+ action=double_click_action,
213
+ includeScreenShot=False,
214
+ includeCursorPosition=False
215
+ )
216
+
217
+ return self._execute_with_retry(action_dto)
218
+
219
+ def _move(self, act: Move) -> dto.SandboxActionResponseDto:
220
+ """执行鼠标移动操作"""
221
+ move_action = dto.MouseMoveAction(
222
+ type="mouse:move",
223
+ x=dto.PixelLength(type="px", value=act.x),
224
+ y=dto.PixelLength(type="px", value=act.y),
225
+ holdKey=" ".join(act.holdKey) if act.holdKey else ""
226
+ )
227
+
228
+ action_dto = dto.ComputerUseActionDto(
229
+ action=move_action,
230
+ includeScreenShot=False,
231
+ includeCursorPosition=False
232
+ )
233
+
234
+ return self._execute_with_retry(action_dto)
235
+
236
+ def _drag(self, act: Drag) -> dto.SandboxActionResponseDto:
237
+ """执行拖拽操作"""
238
+ drag_action = dto.MouseDragAction(
239
+ type="mouse:drag",
240
+ startX=dto.PixelLength(type="px", value=act.startX),
241
+ startY=dto.PixelLength(type="px", value=act.startY),
242
+ endX=dto.PixelLength(type="px", value=act.endX),
243
+ endY=dto.PixelLength(type="px", value=act.endY),
244
+ holdKey=" ".join(act.holdKey) if act.holdKey else ""
245
+ )
246
+
247
+ action_dto = dto.ComputerUseActionDto(
248
+ action=drag_action,
249
+ includeScreenShot=False,
250
+ includeCursorPosition=False
251
+ )
252
+
253
+ return self._execute_with_retry(action_dto)
254
+
255
+ def _type(self, act: TypeText) -> dto.SandboxActionResponseDto:
256
+ """执行文本输入操作"""
257
+ type_action = dto.KeyboardTypeAction(
258
+ type="keyboard:type",
259
+ content=act.text,
260
+ treatNewLineAsEnter=True # 默认将换行符作为回车键处理
261
+ )
262
+
263
+ action_dto = dto.ComputerUseActionDto(
264
+ action=type_action,
265
+ includeScreenShot=False,
266
+ includeCursorPosition=False
267
+ )
268
+
269
+ return self._execute_with_retry(action_dto)
270
+
271
+ def _scroll(self, act: Scroll) -> dto.SandboxActionResponseDto:
272
+ """执行滚动操作"""
273
+ # 根据滚动方向确定stepVertical和stepHorizontal
274
+ step_vertical = 0
275
+ step_horizontal = 0
276
+
277
+ if act.stepVertical is not None:
278
+ step_vertical = act.stepVertical
279
+ if act.stepHorizontal is not None:
280
+ step_horizontal = act.stepHorizontal
281
+
282
+ scroll_action = dto.MouseScrollAction(
283
+ type="mouse:scroll",
284
+ x=dto.PixelLength(type="px", value=act.x),
285
+ y=dto.PixelLength(type="px", value=act.y),
286
+ stepVertical=step_vertical,
287
+ stepHorizontal=step_horizontal,
288
+ holdKey=" ".join(act.holdKey) if act.holdKey else ""
289
+ )
290
+
291
+ action_dto = dto.ComputerUseActionDto(
292
+ action=scroll_action,
293
+ includeScreenShot=False,
294
+ includeCursorPosition=False
295
+ )
296
+
297
+ return self._execute_with_retry(action_dto)
298
+
299
+ def _hotkey(self, act: Hotkey) -> dto.SandboxActionResponseDto:
300
+ """执行快捷键操作"""
301
+ # 处理持续时间
302
+ duration = 80 # 默认值
303
+ if act.duration is not None:
304
+ if 1 <= act.duration <= 5000:
305
+ duration = act.duration
306
+ else:
307
+ raise ValueError("Hotkey duration must be between 1 and 5000")
308
+
309
+ # 将键列表转换为空格分隔的字符串(根据SDK文档)
310
+ keys_str = " ".join(act.keys).lower()
311
+
312
+ hotkey_action = dto.KeyboardHotkeyAction(
313
+ type="keyboard:hotkey",
314
+ keys=keys_str,
315
+ duration=duration
316
+ )
317
+
318
+ action_dto = dto.ComputerUseActionDto(
319
+ action=hotkey_action,
320
+ includeScreenShot=False,
321
+ includeCursorPosition=False
322
+ )
323
+
324
+ return self._execute_with_retry(action_dto)
325
+
326
+ def _screenshot(self) -> Image.Image:
327
+ """
328
+ 获取屏幕截图
329
+ 使用SDK的get_screenshot方法
330
+ """
331
+ async def _get_screenshot():
332
+ return await self.sandbox_manager.get_screenshot(self.sandbox_id)
333
+
334
+ try:
335
+ url, image, b64_str = self.loop.run_until_complete(_get_screenshot())
336
+
337
+ # 返回PIL图像,保持与原LybicBackend的兼容性
338
+ # 如果需要cursor信息,可以通过其他方式获取
339
+ return image
340
+
341
+ except Exception as e:
342
+ raise RuntimeError(f"Failed to take screenshot: {e}") from e
343
+
344
+ def get_sandbox_id(self) -> str:
345
+ """获取当前沙盒ID"""
346
+ if self.sandbox_id is None:
347
+ raise RuntimeError("Sandbox ID is not available")
348
+ return self.sandbox_id
349
+
350
+ def close(self):
351
+ """关闭客户端连接"""
352
+ try:
353
+ self.loop.run_until_complete(self.client.close())
354
+ except Exception as e:
355
+ log.warning(f"Error closing Lybic client: {e}")
@@ -0,0 +1,186 @@
1
+ # ---------------------------------------------------------------------------
2
+ # 1) Desktop automation backend (PyAutoGUI)
3
+ # ---------------------------------------------------------------------------
4
+ import os
5
+ import subprocess, difflib
6
+ import sys
7
+ import pyperclip
8
+ from PIL import Image
9
+ from numpy import imag
10
+ from gui_agents.agents.Action import (
11
+ Action,
12
+ Click,
13
+ DoubleClick,
14
+ Move,
15
+ Scroll,
16
+ Drag,
17
+ TypeText,
18
+ Hotkey,
19
+ Wait,
20
+ Screenshot
21
+ )
22
+
23
+ from gui_agents.agents.Backend.Backend import Backend
24
+ import time
25
+
26
+
27
+ class PyAutoGUIBackend(Backend):
28
+ """Pure local desktop backend powered by *pyautogui*.
29
+
30
+ Pros : zero dependency besides Python & pyautogui.
31
+ Cons : Requires an active, visible desktop session (won't work headless).
32
+ """
33
+
34
+ _supported = {Click, DoubleClick, Move, Scroll, Drag, TypeText, Hotkey, Wait, Screenshot}
35
+
36
+ # ¶ PyAutoGUI sometimes throws exceptions if mouse is moved to a corner.
37
+ def __init__(self, default_move_duration: float = 0.0, platform: str | None = None):
38
+ import pyautogui as pag # local import to avoid hard requirement
39
+ pag.FAILSAFE = False
40
+ self.pag = pag
41
+ self.default_move_duration = default_move_duration
42
+ # ↙️ Critical patch: save platform identifier
43
+ self.platform = (platform or sys.platform).lower()
44
+
45
+ # ------------------------------------------------------------------
46
+ def execute(self, action: Action) -> None:
47
+ if not self.supports(type(action)):
48
+ raise NotImplementedError(f"{type(action).__name__} not supported by PyAutoGUIBackend")
49
+
50
+ if isinstance(action, Click):
51
+ self._click(action)
52
+ elif isinstance(action, DoubleClick):
53
+ self._doubleClick(action)
54
+ elif isinstance(action, Move):
55
+ self._move(action)
56
+ elif isinstance(action, Scroll):
57
+ self._scroll(action)
58
+ elif isinstance(action, Drag):
59
+ self._drag(action)
60
+ elif isinstance(action, TypeText):
61
+ self._type(action)
62
+ elif isinstance(action, Hotkey):
63
+ self._hotkey(action)
64
+ elif isinstance(action, Screenshot):
65
+ screenshot = self._screenshot()
66
+ return screenshot # type: ignore
67
+ elif isinstance(action, Wait):
68
+ time.sleep(action.duration * 1e-3)
69
+ else:
70
+ # This shouldn't happen due to supports() check, but be safe.
71
+ raise NotImplementedError(f"Unhandled action: {action}")
72
+
73
+ # ----- individual helpers ------------------------------------------------
74
+ def _click(self, act: Click) -> None:
75
+ for k in act.holdKey or []:
76
+ self.pag.keyDown(k)
77
+ time.sleep(0.05)
78
+
79
+ button_str = 'primary'
80
+ if act.button == 1:
81
+ button_str = "left"
82
+ elif act.button == 4:
83
+ button_str = "middle"
84
+ elif act.button == 2:
85
+ button_str = "right"
86
+
87
+ self.pag.click(
88
+ x=act.x,
89
+ y=act.y,
90
+ clicks=1,
91
+ button=button_str, # type: ignore
92
+ duration=self.default_move_duration,
93
+ interval=0.5,
94
+ )
95
+ for k in act.holdKey or []:
96
+ self.pag.keyUp(k)
97
+
98
+ def _doubleClick(self, act: DoubleClick) -> None:
99
+ for k in act.holdKey or []:
100
+ self.pag.keyDown(k)
101
+ time.sleep(0.05)
102
+ button_str = 'primary'
103
+ if act.button == 1:
104
+ button_str = "left"
105
+ elif act.button == 4:
106
+ button_str = "middle"
107
+ elif act.button == 2:
108
+ button_str = "right"
109
+
110
+ self.pag.click(
111
+ x=act.x,
112
+ y=act.y,
113
+ clicks=2,
114
+ button=button_str,
115
+ duration=self.default_move_duration,
116
+ interval=0.5,
117
+ )
118
+ for k in act.holdKey or []:
119
+ self.pag.keyUp(k)
120
+
121
+ def _move(self, act: Move) -> None:
122
+ for k in act.holdKey or []:
123
+ self.pag.keyDown(k)
124
+ time.sleep(0.05)
125
+ self.pag.moveTo(x = act.x, y = act.y)
126
+ for k in act.holdKey or []:
127
+ self.pag.keyUp(k)
128
+
129
+ def _scroll(self, act: Scroll) -> None:
130
+ self.pag.moveTo(x = act.x, y = act.y)
131
+ if act.stepVertical is None:
132
+ if act.stepHorizontal is not None:
133
+ self.pag.hscroll(act.stepHorizontal)
134
+ else:
135
+ self.pag.vscroll(act.stepVertical)
136
+
137
+ def _drag(self, act: Drag) -> None:
138
+ for k in act.holdKey or []:
139
+ self.pag.keyDown(k)
140
+ time.sleep(0.05)
141
+
142
+ self.pag.moveTo(x=act.startX, y=act.startY)
143
+ time.sleep(0.1)
144
+
145
+ self.pag.mouseDown(button='left')
146
+ time.sleep(0.2)
147
+
148
+ self.pag.moveTo(x=act.endX, y=act.endY, duration=0.5)
149
+ time.sleep(0.1)
150
+
151
+ self.pag.mouseUp(button='left')
152
+
153
+ for k in act.holdKey or []:
154
+ self.pag.keyUp(k)
155
+
156
+ def _type(self, act: TypeText) -> None:
157
+ # ------- Paste Chinese / any text --------------------------------
158
+ pyperclip.copy(act.text)
159
+ time.sleep(0.05) # let clipboard stabilize
160
+
161
+ if self.platform.startswith("darwin"):
162
+ # self.pag.hotkey("commandright", "v", interval=0.05)
163
+ # # 1. Press Command key
164
+ subprocess.run([
165
+ "osascript", "-e",
166
+ 'tell application "System Events" to keystroke "v" using command down'
167
+ ])
168
+
169
+ else: # Windows / Linux
170
+ self.pag.hotkey("ctrl", "v", interval=0.05)
171
+
172
+ def _hotkey(self, act: Hotkey) -> None:
173
+ # self.pag.hotkey(*act.keys, interval=0.1)
174
+ if act.duration is not None:
175
+ for k in act.keys or []:
176
+ self.pag.keyDown(k)
177
+ time.sleep(act.duration * 1e-3)
178
+ # time.sleep(act.duration * 1e-3)
179
+ for k in reversed(act.keys):
180
+ self.pag.keyUp(k)
181
+ else:
182
+ self.pag.hotkey(*act.keys, interval=0.1)
183
+
184
+ def _screenshot(self):
185
+ screenshot = self.pag.screenshot()
186
+ return screenshot