oagi-core 0.12.0__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/__init__.py +27 -0
- oagi/agent/default.py +1 -1
- oagi/agent/tasker/taskee_agent.py +1 -1
- oagi/agent/tasker/tasker_agent.py +1 -1
- oagi/cli/agent.py +17 -7
- oagi/client/async_.py +6 -1
- oagi/client/base.py +2 -1
- oagi/client/sync.py +8 -1
- oagi/handler/__init__.py +41 -18
- oagi/handler/_ydotool.py +158 -0
- oagi/handler/async_ydotool_action_handler.py +52 -0
- oagi/handler/pil_image.py +6 -0
- oagi/handler/utils.py +21 -0
- oagi/handler/wayland_support.py +219 -0
- oagi/handler/ydotool_action_handler.py +226 -0
- oagi/platform_info.py +51 -0
- {oagi_core-0.12.0.dist-info → oagi_core-0.13.0.dist-info}/METADATA +42 -1
- {oagi_core-0.12.0.dist-info → oagi_core-0.13.0.dist-info}/RECORD +21 -15
- {oagi_core-0.12.0.dist-info → oagi_core-0.13.0.dist-info}/WHEEL +0 -0
- {oagi_core-0.12.0.dist-info → oagi_core-0.13.0.dist-info}/entry_points.txt +0 -0
- {oagi_core-0.12.0.dist-info → oagi_core-0.13.0.dist-info}/licenses/LICENSE +0 -0
oagi/__init__.py
CHANGED
|
@@ -6,8 +6,11 @@
|
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
8
|
import importlib
|
|
9
|
+
import importlib.metadata
|
|
9
10
|
from typing import TYPE_CHECKING
|
|
10
11
|
|
|
12
|
+
__version__ = importlib.metadata.version("oagi-core")
|
|
13
|
+
|
|
11
14
|
from oagi.actor import Actor, AsyncActor, AsyncShortTask, AsyncTask, ShortTask, Task
|
|
12
15
|
from oagi.client import AsyncClient, SyncClient
|
|
13
16
|
from oagi.exceptions import (
|
|
@@ -62,6 +65,22 @@ _LAZY_IMPORTS_DATA: dict[str, tuple[str, str | None, str | None]] = {
|
|
|
62
65
|
"create_app": ("oagi.server.main", "socketio", "server"),
|
|
63
66
|
"ServerConfig": ("oagi.server.config", "pydantic_settings", "server"),
|
|
64
67
|
"sio": ("oagi.server.socketio_server", "socketio", "server"),
|
|
68
|
+
# Wayland handlers
|
|
69
|
+
"AsyncYdotoolActionHandler": (
|
|
70
|
+
"oagi.handler.async_ydotool_action_handler",
|
|
71
|
+
"screeninfo",
|
|
72
|
+
"desktop",
|
|
73
|
+
),
|
|
74
|
+
"YdotoolActionHandler": (
|
|
75
|
+
"oagi.handler.ydotool_action_handler",
|
|
76
|
+
"screeninfo",
|
|
77
|
+
"desktop",
|
|
78
|
+
),
|
|
79
|
+
"YdotoolConfig": (
|
|
80
|
+
"oagi.handler.ydotool_action_handler",
|
|
81
|
+
"screeninfo",
|
|
82
|
+
"desktop",
|
|
83
|
+
),
|
|
65
84
|
}
|
|
66
85
|
|
|
67
86
|
if TYPE_CHECKING:
|
|
@@ -70,12 +89,14 @@ if TYPE_CHECKING:
|
|
|
70
89
|
from oagi.agent.tasker import TaskerAgent
|
|
71
90
|
from oagi.handler.async_pyautogui_action_handler import AsyncPyautoguiActionHandler
|
|
72
91
|
from oagi.handler.async_screenshot_maker import AsyncScreenshotMaker
|
|
92
|
+
from oagi.handler.async_ydotool_action_handler import AsyncYdotoolActionHandler
|
|
73
93
|
from oagi.handler.pil_image import PILImage
|
|
74
94
|
from oagi.handler.pyautogui_action_handler import (
|
|
75
95
|
PyautoguiActionHandler,
|
|
76
96
|
PyautoguiConfig,
|
|
77
97
|
)
|
|
78
98
|
from oagi.handler.screenshot_maker import ScreenshotMaker
|
|
99
|
+
from oagi.handler.ydotool_action_handler import YdotoolActionHandler, YdotoolConfig
|
|
79
100
|
from oagi.server.config import ServerConfig
|
|
80
101
|
from oagi.server.main import create_app
|
|
81
102
|
from oagi.server.socketio_server import sio
|
|
@@ -98,6 +119,8 @@ def __dir__() -> list[str]:
|
|
|
98
119
|
|
|
99
120
|
|
|
100
121
|
__all__ = [
|
|
122
|
+
# Version
|
|
123
|
+
"__version__",
|
|
101
124
|
# Core sync classes
|
|
102
125
|
"Actor",
|
|
103
126
|
"AsyncActor",
|
|
@@ -143,4 +166,8 @@ __all__ = [
|
|
|
143
166
|
"create_app",
|
|
144
167
|
"ServerConfig",
|
|
145
168
|
"sio",
|
|
169
|
+
# Lazy imports - Wayland handler classes
|
|
170
|
+
"AsyncYdotoolActionHandler",
|
|
171
|
+
"YdotoolActionHandler",
|
|
172
|
+
"YdotoolConfig",
|
|
146
173
|
]
|
oagi/agent/default.py
CHANGED
|
@@ -16,7 +16,7 @@ from oagi.constants import (
|
|
|
16
16
|
DEFAULT_TEMPERATURE,
|
|
17
17
|
MODEL_ACTOR,
|
|
18
18
|
)
|
|
19
|
-
from oagi.handler import reset_handler
|
|
19
|
+
from oagi.handler.utils import reset_handler
|
|
20
20
|
from oagi.types import AsyncActionHandler, AsyncImageProvider, AsyncObserver, SplitEvent
|
|
21
21
|
|
|
22
22
|
from ..protocol import AsyncAgent
|
oagi/cli/agent.py
CHANGED
|
@@ -206,14 +206,25 @@ def _warn_missing_permissions() -> None:
|
|
|
206
206
|
|
|
207
207
|
def run_agent(args: argparse.Namespace) -> None:
|
|
208
208
|
# Check if desktop extras are installed
|
|
209
|
-
check_optional_dependency("pyautogui", "Agent execution", "desktop")
|
|
210
209
|
check_optional_dependency("PIL", "Agent execution", "desktop")
|
|
211
210
|
|
|
212
|
-
|
|
213
|
-
_warn_missing_permissions()
|
|
214
|
-
|
|
215
|
-
from oagi import AsyncPyautoguiActionHandler, AsyncScreenshotMaker # noqa: PLC0415
|
|
211
|
+
from oagi import AsyncScreenshotMaker # noqa: PLC0415
|
|
216
212
|
from oagi.agent import create_agent # noqa: PLC0415
|
|
213
|
+
from oagi.handler.wayland_support import is_wayland_display_server # noqa: PLC0415
|
|
214
|
+
|
|
215
|
+
# Select appropriate action handler based on display server
|
|
216
|
+
if is_wayland_display_server():
|
|
217
|
+
check_optional_dependency("screeninfo", "Agent execution (Wayland)", "desktop")
|
|
218
|
+
from oagi import AsyncYdotoolActionHandler # noqa: PLC0415
|
|
219
|
+
|
|
220
|
+
action_handler = AsyncYdotoolActionHandler()
|
|
221
|
+
else:
|
|
222
|
+
check_optional_dependency("pyautogui", "Agent execution", "desktop")
|
|
223
|
+
# Warn about missing macOS permissions (non-blocking)
|
|
224
|
+
_warn_missing_permissions()
|
|
225
|
+
from oagi import AsyncPyautoguiActionHandler # noqa: PLC0415
|
|
226
|
+
|
|
227
|
+
action_handler = AsyncPyautoguiActionHandler()
|
|
217
228
|
|
|
218
229
|
# Get configuration
|
|
219
230
|
api_key = args.oagi_api_key or os.getenv("OAGI_API_KEY")
|
|
@@ -266,8 +277,7 @@ def run_agent(args: argparse.Namespace) -> None:
|
|
|
266
277
|
# Create agent
|
|
267
278
|
agent = create_agent(**agent_kwargs)
|
|
268
279
|
|
|
269
|
-
# Create
|
|
270
|
-
action_handler = AsyncPyautoguiActionHandler()
|
|
280
|
+
# Create image provider
|
|
271
281
|
image_provider = AsyncScreenshotMaker()
|
|
272
282
|
|
|
273
283
|
if args.instruction:
|
oagi/client/async_.py
CHANGED
|
@@ -19,6 +19,7 @@ from ..constants import (
|
|
|
19
19
|
HTTP_CLIENT_TIMEOUT,
|
|
20
20
|
)
|
|
21
21
|
from ..logging import get_logger
|
|
22
|
+
from ..platform_info import get_sdk_headers
|
|
22
23
|
from ..types import Image
|
|
23
24
|
from ..types.models import GenerateResponse, UploadFileResponse, Usage
|
|
24
25
|
from ..types.models.step import Step
|
|
@@ -54,17 +55,21 @@ class AsyncClient(BaseClient[httpx.AsyncClient]):
|
|
|
54
55
|
):
|
|
55
56
|
super().__init__(base_url, api_key, max_retries)
|
|
56
57
|
|
|
58
|
+
# Get SDK headers for all clients
|
|
59
|
+
sdk_headers = get_sdk_headers()
|
|
60
|
+
|
|
57
61
|
# OpenAI client for chat completions (with retries)
|
|
58
62
|
self.openai_client = AsyncOpenAI(
|
|
59
63
|
api_key=self.api_key,
|
|
60
64
|
base_url=f"{self.base_url}/v1",
|
|
61
65
|
max_retries=self.max_retries,
|
|
66
|
+
default_headers=sdk_headers,
|
|
62
67
|
)
|
|
63
68
|
|
|
64
69
|
# httpx clients for S3 uploads and other endpoints (with retries)
|
|
65
70
|
transport = AsyncHTTPTransport(retries=self.max_retries)
|
|
66
71
|
self.http_client = httpx.AsyncClient(
|
|
67
|
-
transport=transport, base_url=self.base_url
|
|
72
|
+
transport=transport, base_url=self.base_url, headers=sdk_headers
|
|
68
73
|
)
|
|
69
74
|
self.upload_client = httpx.AsyncClient(
|
|
70
75
|
transport=transport, timeout=HTTP_CLIENT_TIMEOUT
|
oagi/client/base.py
CHANGED
|
@@ -29,6 +29,7 @@ from ..exceptions import (
|
|
|
29
29
|
ValidationError,
|
|
30
30
|
)
|
|
31
31
|
from ..logging import get_logger
|
|
32
|
+
from ..platform_info import get_sdk_headers
|
|
32
33
|
from ..types.models import (
|
|
33
34
|
ErrorResponse,
|
|
34
35
|
GenerateResponse,
|
|
@@ -73,7 +74,7 @@ class BaseClient(Generic[HttpClientT]):
|
|
|
73
74
|
logger.info(f"Client initialized with base_url: {self.base_url}")
|
|
74
75
|
|
|
75
76
|
def _build_headers(self, api_version: str | None = None) -> dict[str, str]:
|
|
76
|
-
headers
|
|
77
|
+
headers = get_sdk_headers()
|
|
77
78
|
if api_version:
|
|
78
79
|
headers["x-api-version"] = api_version
|
|
79
80
|
if self.api_key:
|
oagi/client/sync.py
CHANGED
|
@@ -19,6 +19,7 @@ from ..constants import (
|
|
|
19
19
|
HTTP_CLIENT_TIMEOUT,
|
|
20
20
|
)
|
|
21
21
|
from ..logging import get_logger
|
|
22
|
+
from ..platform_info import get_sdk_headers
|
|
22
23
|
from ..types import Image
|
|
23
24
|
from ..types.models import GenerateResponse, UploadFileResponse, Usage
|
|
24
25
|
from ..types.models.step import Step
|
|
@@ -54,16 +55,22 @@ class SyncClient(BaseClient[httpx.Client]):
|
|
|
54
55
|
):
|
|
55
56
|
super().__init__(base_url, api_key, max_retries)
|
|
56
57
|
|
|
58
|
+
# Get SDK headers for all clients
|
|
59
|
+
sdk_headers = get_sdk_headers()
|
|
60
|
+
|
|
57
61
|
# OpenAI client for chat completions (with retries)
|
|
58
62
|
self.openai_client = OpenAI(
|
|
59
63
|
api_key=self.api_key,
|
|
60
64
|
base_url=f"{self.base_url}/v1",
|
|
61
65
|
max_retries=self.max_retries,
|
|
66
|
+
default_headers=sdk_headers,
|
|
62
67
|
)
|
|
63
68
|
|
|
64
69
|
# httpx clients for S3 uploads and other endpoints (with retries)
|
|
65
70
|
transport = HTTPTransport(retries=self.max_retries)
|
|
66
|
-
self.http_client = httpx.Client(
|
|
71
|
+
self.http_client = httpx.Client(
|
|
72
|
+
transport=transport, base_url=self.base_url, headers=sdk_headers
|
|
73
|
+
)
|
|
67
74
|
self.upload_client = httpx.Client(
|
|
68
75
|
transport=transport, timeout=HTTP_CLIENT_TIMEOUT
|
|
69
76
|
)
|
oagi/handler/__init__.py
CHANGED
|
@@ -5,28 +5,48 @@
|
|
|
5
5
|
# This file is part of the official API project.
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
|
-
|
|
9
|
-
from
|
|
10
|
-
from oagi.handler.pil_image import PILImage
|
|
11
|
-
from oagi.handler.pyautogui_action_handler import (
|
|
12
|
-
PyautoguiActionHandler,
|
|
13
|
-
PyautoguiConfig,
|
|
14
|
-
)
|
|
15
|
-
from oagi.handler.screenshot_maker import ScreenshotMaker
|
|
8
|
+
import importlib
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
16
10
|
|
|
11
|
+
from .utils import reset_handler
|
|
17
12
|
|
|
18
|
-
|
|
19
|
-
|
|
13
|
+
# Lazy imports for pyautogui-dependent modules to avoid import errors on headless systems
|
|
14
|
+
_LAZY_IMPORTS: dict[str, str] = {
|
|
15
|
+
"AsyncPyautoguiActionHandler": "oagi.handler.async_pyautogui_action_handler",
|
|
16
|
+
"AsyncScreenshotMaker": "oagi.handler.async_screenshot_maker",
|
|
17
|
+
"PILImage": "oagi.handler.pil_image",
|
|
18
|
+
"PyautoguiActionHandler": "oagi.handler.pyautogui_action_handler",
|
|
19
|
+
"PyautoguiConfig": "oagi.handler.pyautogui_action_handler",
|
|
20
|
+
"ScreenshotMaker": "oagi.handler.screenshot_maker",
|
|
21
|
+
"AsyncYdotoolActionHandler": "oagi.handler.async_ydotool_action_handler",
|
|
22
|
+
"YdotoolActionHandler": "oagi.handler.ydotool_action_handler",
|
|
23
|
+
"YdotoolConfig": "oagi.handler.ydotool_action_handler",
|
|
24
|
+
}
|
|
20
25
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from oagi.handler.async_pyautogui_action_handler import AsyncPyautoguiActionHandler
|
|
28
|
+
from oagi.handler.async_screenshot_maker import AsyncScreenshotMaker
|
|
29
|
+
from oagi.handler.async_ydotool_action_handler import AsyncYdotoolActionHandler
|
|
30
|
+
from oagi.handler.pil_image import PILImage
|
|
31
|
+
from oagi.handler.pyautogui_action_handler import (
|
|
32
|
+
PyautoguiActionHandler,
|
|
33
|
+
PyautoguiConfig,
|
|
34
|
+
)
|
|
35
|
+
from oagi.handler.screenshot_maker import ScreenshotMaker
|
|
36
|
+
from oagi.handler.ydotool_action_handler import YdotoolActionHandler, YdotoolConfig
|
|
24
37
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
"""
|
|
28
|
-
if
|
|
29
|
-
|
|
38
|
+
|
|
39
|
+
def __getattr__(name: str):
|
|
40
|
+
"""Lazy import for pyautogui-dependent modules."""
|
|
41
|
+
if name in _LAZY_IMPORTS:
|
|
42
|
+
module = importlib.import_module(_LAZY_IMPORTS[name])
|
|
43
|
+
return getattr(module, name)
|
|
44
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def __dir__() -> list[str]:
|
|
48
|
+
"""Return all public names including lazy imports."""
|
|
49
|
+
return sorted(set(__all__) | set(_LAZY_IMPORTS.keys()))
|
|
30
50
|
|
|
31
51
|
|
|
32
52
|
__all__ = [
|
|
@@ -37,4 +57,7 @@ __all__ = [
|
|
|
37
57
|
"ScreenshotMaker",
|
|
38
58
|
"AsyncScreenshotMaker",
|
|
39
59
|
"reset_handler",
|
|
60
|
+
"YdotoolConfig",
|
|
61
|
+
"YdotoolActionHandler",
|
|
62
|
+
"AsyncYdotoolActionHandler",
|
|
40
63
|
]
|
oagi/handler/_ydotool.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
"""
|
|
9
|
+
Taken from /usr/include/linux/input-event-codes.h
|
|
10
|
+
|
|
11
|
+
The keys supported in this mapping are the same as the keys supported in pyautogui.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
KEYCODE_MAP = {
|
|
15
|
+
# Letters
|
|
16
|
+
"a": 30,
|
|
17
|
+
"s": 31,
|
|
18
|
+
"d": 32,
|
|
19
|
+
"f": 33,
|
|
20
|
+
"h": 35,
|
|
21
|
+
"g": 34,
|
|
22
|
+
"z": 44,
|
|
23
|
+
"x": 45,
|
|
24
|
+
"c": 46,
|
|
25
|
+
"v": 47,
|
|
26
|
+
"b": 48,
|
|
27
|
+
"q": 16,
|
|
28
|
+
"w": 17,
|
|
29
|
+
"e": 18,
|
|
30
|
+
"r": 19,
|
|
31
|
+
"y": 21,
|
|
32
|
+
"t": 20,
|
|
33
|
+
"o": 24,
|
|
34
|
+
"u": 22,
|
|
35
|
+
"i": 23,
|
|
36
|
+
"p": 25,
|
|
37
|
+
"l": 38,
|
|
38
|
+
"j": 36,
|
|
39
|
+
"k": 37,
|
|
40
|
+
"n": 49,
|
|
41
|
+
"m": 50,
|
|
42
|
+
# Numbers and shifted symbols
|
|
43
|
+
"1": 2,
|
|
44
|
+
"!": 2,
|
|
45
|
+
"2": 3,
|
|
46
|
+
"@": 3,
|
|
47
|
+
"3": 4,
|
|
48
|
+
"#": 4,
|
|
49
|
+
"4": 5,
|
|
50
|
+
"$": 5,
|
|
51
|
+
"5": 6,
|
|
52
|
+
"%": 6,
|
|
53
|
+
"6": 7,
|
|
54
|
+
"^": 7,
|
|
55
|
+
"7": 8,
|
|
56
|
+
"&": 8,
|
|
57
|
+
"8": 9,
|
|
58
|
+
"*": 9,
|
|
59
|
+
"9": 10,
|
|
60
|
+
"(": 10,
|
|
61
|
+
"0": 11,
|
|
62
|
+
")": 11,
|
|
63
|
+
# Punctuation and symbols
|
|
64
|
+
"-": 12,
|
|
65
|
+
"_": 12, # KEY_MINUS
|
|
66
|
+
"=": 13,
|
|
67
|
+
"+": 13, # KEY_EQUAL
|
|
68
|
+
"[": 26,
|
|
69
|
+
"{": 26, # KEY_LEFTBRACE
|
|
70
|
+
"]": 27,
|
|
71
|
+
"}": 27, # KEY_RIGHTBRACE
|
|
72
|
+
";": 39,
|
|
73
|
+
":": 39, # KEY_SEMICOLON
|
|
74
|
+
"'": 40,
|
|
75
|
+
'"': 40, # KEY_APOSTROPHE
|
|
76
|
+
"`": 41,
|
|
77
|
+
"~": 41, # KEY_GRAVE
|
|
78
|
+
"\\": 43,
|
|
79
|
+
"|": 43, # KEY_BACKSLASH
|
|
80
|
+
",": 51,
|
|
81
|
+
"<": 51, # KEY_COMMA
|
|
82
|
+
".": 52,
|
|
83
|
+
">": 52, # KEY_DOT
|
|
84
|
+
"/": 53,
|
|
85
|
+
"?": 53, # KEY_SLASH
|
|
86
|
+
# Whitespace
|
|
87
|
+
" ": 57,
|
|
88
|
+
"space": 57,
|
|
89
|
+
"\t": 15,
|
|
90
|
+
"tab": 15,
|
|
91
|
+
# Enter / Backspace / Esc
|
|
92
|
+
"\r": 28,
|
|
93
|
+
"\n": 28,
|
|
94
|
+
"enter": 28,
|
|
95
|
+
"return": 28,
|
|
96
|
+
"backspace": 14,
|
|
97
|
+
"\b": 14,
|
|
98
|
+
"esc": 1,
|
|
99
|
+
"escape": 1,
|
|
100
|
+
# Modifiers
|
|
101
|
+
"shift": 42,
|
|
102
|
+
"shiftleft": 42,
|
|
103
|
+
"shiftright": 54,
|
|
104
|
+
"capslock": 58,
|
|
105
|
+
"ctrl": 29,
|
|
106
|
+
"ctrlleft": 29,
|
|
107
|
+
"ctrlright": 97,
|
|
108
|
+
"alt": 56,
|
|
109
|
+
"altleft": 56,
|
|
110
|
+
"option": 56,
|
|
111
|
+
"optionleft": 56,
|
|
112
|
+
"optionright": 100,
|
|
113
|
+
"command": 125, # map to KEY_LEFTMETA
|
|
114
|
+
"fn": 464, # KEY_FN (0x1d0)
|
|
115
|
+
# Function keys
|
|
116
|
+
"f1": 59,
|
|
117
|
+
"f2": 60,
|
|
118
|
+
"f3": 61,
|
|
119
|
+
"f4": 62,
|
|
120
|
+
"f5": 63,
|
|
121
|
+
"f6": 64,
|
|
122
|
+
"f7": 65,
|
|
123
|
+
"f8": 66,
|
|
124
|
+
"f9": 67,
|
|
125
|
+
"f10": 68,
|
|
126
|
+
"f11": 87,
|
|
127
|
+
"f12": 88,
|
|
128
|
+
"f13": 183,
|
|
129
|
+
"f14": 184,
|
|
130
|
+
"f15": 185,
|
|
131
|
+
"f16": 186,
|
|
132
|
+
"f17": 187,
|
|
133
|
+
"f18": 188,
|
|
134
|
+
"f19": 189,
|
|
135
|
+
"f20": 190,
|
|
136
|
+
# Navigation
|
|
137
|
+
"home": 102,
|
|
138
|
+
"end": 107,
|
|
139
|
+
"pageup": 104,
|
|
140
|
+
"pgup": 104,
|
|
141
|
+
"pagedown": 109,
|
|
142
|
+
"pgdn": 109,
|
|
143
|
+
"left": 105,
|
|
144
|
+
"right": 106,
|
|
145
|
+
"up": 103,
|
|
146
|
+
"down": 108,
|
|
147
|
+
"del": 111,
|
|
148
|
+
"delete": 111,
|
|
149
|
+
# Media
|
|
150
|
+
"volumeup": 115,
|
|
151
|
+
"volumedown": 114,
|
|
152
|
+
"volumemute": 113,
|
|
153
|
+
# Locale-specific keys
|
|
154
|
+
"yen": 124, # KEY_YEN
|
|
155
|
+
"eisu": 85, # mapped to KEY_ZENKAKUHANKAKU (common JIS toggle)
|
|
156
|
+
"kana": 90, # KEY_KATAKANA
|
|
157
|
+
"help": 138, # KEY_HELP
|
|
158
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
|
|
11
|
+
from ..types import Action
|
|
12
|
+
from .ydotool_action_handler import YdotoolActionHandler, YdotoolConfig
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AsyncYdotoolActionHandler:
|
|
16
|
+
"""
|
|
17
|
+
Async wrapper for YdotoolActionHandler that runs actions in a thread pool.
|
|
18
|
+
|
|
19
|
+
This allows Ydotool operations to be non-blocking in async contexts,
|
|
20
|
+
enabling concurrent execution of other async tasks while GUI actions are performed.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, config: YdotoolConfig | None = None):
|
|
24
|
+
"""Initialize with optional configuration.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
config: YdotoolConfig instance for customizing behavior
|
|
28
|
+
"""
|
|
29
|
+
self.config = config or YdotoolConfig()
|
|
30
|
+
self.sync_handler = YdotoolActionHandler(config=self.config)
|
|
31
|
+
|
|
32
|
+
def reset(self):
|
|
33
|
+
"""Reset handler state.
|
|
34
|
+
|
|
35
|
+
Delegates to the underlying synchronous handler's reset method.
|
|
36
|
+
Called at automation start/end and when FINISH action is received.
|
|
37
|
+
"""
|
|
38
|
+
self.sync_handler.reset()
|
|
39
|
+
|
|
40
|
+
async def __call__(self, actions: list[Action]) -> None:
|
|
41
|
+
"""
|
|
42
|
+
Execute actions asynchronously using a thread pool executor.
|
|
43
|
+
|
|
44
|
+
This prevents PyAutoGUI operations from blocking the async event loop,
|
|
45
|
+
allowing other coroutines to run while GUI actions are being performed.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
actions: List of actions to execute
|
|
49
|
+
"""
|
|
50
|
+
loop = asyncio.get_event_loop()
|
|
51
|
+
# Run the synchronous handler in a thread pool to avoid blocking
|
|
52
|
+
await loop.run_in_executor(None, self.sync_handler, actions)
|
oagi/handler/pil_image.py
CHANGED
|
@@ -10,6 +10,8 @@ import io
|
|
|
10
10
|
|
|
11
11
|
from ..exceptions import check_optional_dependency
|
|
12
12
|
from ..types.models.image_config import ImageConfig
|
|
13
|
+
from .wayland_support import is_wayland_display_server
|
|
14
|
+
from .wayland_support import screenshot as wayland_screenshot
|
|
13
15
|
|
|
14
16
|
check_optional_dependency("PIL", "PILImage", "desktop")
|
|
15
17
|
from PIL import Image as PILImageLib # noqa: E402
|
|
@@ -39,6 +41,10 @@ class PILImage:
|
|
|
39
41
|
@classmethod
|
|
40
42
|
def from_screenshot(cls, config: ImageConfig | None = None) -> "PILImage":
|
|
41
43
|
"""Create PILImage from screenshot."""
|
|
44
|
+
# Use flameshot by default in Wayland display environment
|
|
45
|
+
if is_wayland_display_server():
|
|
46
|
+
return cls(wayland_screenshot(), config)
|
|
47
|
+
|
|
42
48
|
# Lazy import to avoid DISPLAY issues in headless environments
|
|
43
49
|
check_optional_dependency("pyautogui", "PILImage.from_screenshot()", "desktop")
|
|
44
50
|
import pyautogui # noqa: PLC0415
|
oagi/handler/utils.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def reset_handler(handler) -> None:
|
|
11
|
+
"""Reset handler state if supported.
|
|
12
|
+
|
|
13
|
+
Uses duck-typing to check if the handler has a reset() method.
|
|
14
|
+
This allows handlers to reset their internal state (e.g., capslock state)
|
|
15
|
+
at the start of a new automation task.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
handler: The action handler to reset
|
|
19
|
+
"""
|
|
20
|
+
if hasattr(handler, "reset"):
|
|
21
|
+
handler.reset()
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
import io
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
import shlex
|
|
13
|
+
import shutil
|
|
14
|
+
import subprocess
|
|
15
|
+
import time
|
|
16
|
+
|
|
17
|
+
from screeninfo import get_monitors
|
|
18
|
+
|
|
19
|
+
from ..exceptions import check_optional_dependency
|
|
20
|
+
from ._ydotool import KEYCODE_MAP
|
|
21
|
+
|
|
22
|
+
check_optional_dependency("PIL", "PILImage", "desktop")
|
|
23
|
+
from PIL import Image # noqa: E402
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def is_wayland_display_server() -> bool:
|
|
29
|
+
"""Check if Wayland is the current display server."""
|
|
30
|
+
return os.environ.get("WAYLAND_DISPLAY") is not None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_screen_size() -> tuple[int, int]:
|
|
34
|
+
"""Get the screen size in pixels."""
|
|
35
|
+
for monitor in get_monitors():
|
|
36
|
+
if monitor.is_primary:
|
|
37
|
+
return monitor.width, monitor.height
|
|
38
|
+
|
|
39
|
+
# Fallback if no monitor is marked primary
|
|
40
|
+
monitors = get_monitors()
|
|
41
|
+
if monitors:
|
|
42
|
+
return monitors[0].width, monitors[0].height
|
|
43
|
+
raise Exception("No monitor found, cannot get the screen size info")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def screenshot() -> Image:
|
|
47
|
+
"""
|
|
48
|
+
Use Flameshot to take a screenshot and return an Image object
|
|
49
|
+
|
|
50
|
+
:return: Image object of the screenshot
|
|
51
|
+
"""
|
|
52
|
+
# Check if flameshot is installed
|
|
53
|
+
if shutil.which("flameshot") is None:
|
|
54
|
+
raise RuntimeError("flameshot not found. Ensure it is installed and in PATH.")
|
|
55
|
+
cmd = ["flameshot", "full", "--region", "all", "--raw"]
|
|
56
|
+
res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
57
|
+
if res.returncode != 0:
|
|
58
|
+
raise RuntimeError(
|
|
59
|
+
f"flameshot failed: {shlex.join(cmd)}, stdout: {res.stdout.decode(errors='ignore')}, stderr: {res.stderr.decode(errors='ignore')}"
|
|
60
|
+
)
|
|
61
|
+
im = Image.open(io.BytesIO(res.stdout))
|
|
62
|
+
im.load()
|
|
63
|
+
return im
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class Ydotool:
|
|
67
|
+
"""
|
|
68
|
+
Ydotool wrapper for Wayland display server.
|
|
69
|
+
|
|
70
|
+
:param socket_address: The socket address for ydotool, default is empty string.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, socket_address: str = "") -> None:
|
|
74
|
+
# Check if ydotool is installed
|
|
75
|
+
if shutil.which("ydotool") is None:
|
|
76
|
+
raise RuntimeError("ydotool not found. Ensure it is installed and in PATH.")
|
|
77
|
+
# Set default delay between actions
|
|
78
|
+
self.action_pause = 0.5
|
|
79
|
+
# Last action time
|
|
80
|
+
self.last_action_time = 0.0
|
|
81
|
+
# Customize the socket address for ydotool
|
|
82
|
+
self.socket_address = socket_address
|
|
83
|
+
# Check environment issues for ydotool
|
|
84
|
+
self.environ_check()
|
|
85
|
+
|
|
86
|
+
def environ_check(self):
|
|
87
|
+
"""Check environment issues for ydotool"""
|
|
88
|
+
# Check if ydotoold is running
|
|
89
|
+
if not subprocess.run(
|
|
90
|
+
["pgrep", "ydotoold"], capture_output=True, text=True
|
|
91
|
+
).stdout.strip():
|
|
92
|
+
logger.warning("Ydotool daemon (ydotoold) is not running")
|
|
93
|
+
# Check the permission to access the socket address
|
|
94
|
+
socket_address = (
|
|
95
|
+
self.socket_address
|
|
96
|
+
or os.environ.get("YDOTOOL_SOCKET", "")
|
|
97
|
+
or f"/run/user/{os.getuid()}/.ydotool_socket"
|
|
98
|
+
)
|
|
99
|
+
if not os.access(socket_address, os.W_OK) or not os.path.exists(socket_address):
|
|
100
|
+
logger.warning(f"Ydotool cannot connect to socket address:{socket_address}")
|
|
101
|
+
# Check if the mouse acceleration profile is 'flat' (For GNOME)
|
|
102
|
+
accel_profile = subprocess.run(
|
|
103
|
+
[
|
|
104
|
+
"gsettings",
|
|
105
|
+
"get",
|
|
106
|
+
"org.gnome.desktop.peripherals.mouse",
|
|
107
|
+
"accel-profile",
|
|
108
|
+
],
|
|
109
|
+
capture_output=True,
|
|
110
|
+
text=True,
|
|
111
|
+
).stdout.strip()
|
|
112
|
+
if accel_profile and accel_profile != "'flat'":
|
|
113
|
+
logger.warning(
|
|
114
|
+
f"Mouse Acceleration is not disabled, current accel-profile is {accel_profile}). Ydotool may not work as expected."
|
|
115
|
+
+ "Please disable mouse acceleration by running 'gsettings set org.gnome.desktop.peripherals.mouse accel-profile 'flat''",
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def _get_keycode(self, key_char: str) -> int:
|
|
119
|
+
"""
|
|
120
|
+
Get the keycode from input-event-codes mapping.
|
|
121
|
+
:param key_char: Key char (e.g., "A", "ENTER", "F1", "PRINT_SCREEN", case-insensitive)
|
|
122
|
+
:return: Decimal keycode
|
|
123
|
+
"""
|
|
124
|
+
# Lookup and return keycode
|
|
125
|
+
if key_char in KEYCODE_MAP:
|
|
126
|
+
return KEYCODE_MAP[key_char]
|
|
127
|
+
else:
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
def _run_ydotool(self, args: list[str], count: int = 1) -> None:
|
|
131
|
+
"""
|
|
132
|
+
Run ydotool command; e.g., ["click", "500", "300"] => ydotool click 500 300
|
|
133
|
+
"""
|
|
134
|
+
if (interval := (time.time() - self.last_action_time)) < self.action_pause:
|
|
135
|
+
time.sleep(interval)
|
|
136
|
+
if count > 1:
|
|
137
|
+
args.extend(["--repeat", str(count)])
|
|
138
|
+
cmd = ["ydotool", *args]
|
|
139
|
+
# Use shlex.join for clear logging
|
|
140
|
+
logger.debug(f"[ydotool] {shlex.join(cmd)}")
|
|
141
|
+
# Env with socket address
|
|
142
|
+
env = os.environ.copy()
|
|
143
|
+
if self.socket_address:
|
|
144
|
+
env["YDOTOOL_SOCKET"] = self.socket_address
|
|
145
|
+
# Run ydotool command
|
|
146
|
+
res = subprocess.run(
|
|
147
|
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env
|
|
148
|
+
)
|
|
149
|
+
if res.returncode != 0:
|
|
150
|
+
raise RuntimeError(
|
|
151
|
+
f"ydotool failed: {shlex.join(cmd)}, stdout: {res.stdout.decode(errors='ignore').strip()}, stderr: {res.stderr.decode(errors='ignore').strip()}"
|
|
152
|
+
)
|
|
153
|
+
self.last_action_time = time.time()
|
|
154
|
+
|
|
155
|
+
def drag(self, x1: int, y1: int, x2: int, y2: int, count: int = 1) -> None:
|
|
156
|
+
"""
|
|
157
|
+
Drag from (x1, y1) to (x2, y2).
|
|
158
|
+
|
|
159
|
+
"""
|
|
160
|
+
for _ in range(count):
|
|
161
|
+
self.mousemove(x1, y1)
|
|
162
|
+
self._run_ydotool(["click", "0x40"])
|
|
163
|
+
self.mousemove(x2, y2)
|
|
164
|
+
self._run_ydotool(["click", "0x80"])
|
|
165
|
+
|
|
166
|
+
def mousemove(self, x: int, y: int, count: int = 1) -> None:
|
|
167
|
+
"""
|
|
168
|
+
Move mouse to (x, y).
|
|
169
|
+
:param x: X coordinate of the mouse cursor
|
|
170
|
+
:param y: Y coordinate of the mouse cursor
|
|
171
|
+
:param count: Number of mouse move actions to perform
|
|
172
|
+
"""
|
|
173
|
+
self._run_ydotool(
|
|
174
|
+
["mousemove", "--absolute", "-x", str(x), "-y", str(y)], count=count
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def scroll(self, clicks: float) -> None:
|
|
178
|
+
"""
|
|
179
|
+
Scroll mouse wheel in the given direction.
|
|
180
|
+
:param clicks: Number of clicks to scroll, positive for up, negative for down
|
|
181
|
+
"""
|
|
182
|
+
self._run_ydotool(
|
|
183
|
+
[
|
|
184
|
+
"mousemove",
|
|
185
|
+
"-w",
|
|
186
|
+
"--",
|
|
187
|
+
"0",
|
|
188
|
+
str(clicks),
|
|
189
|
+
],
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
def click(self, x: int, y: int, count: int = 1, right: bool = False) -> None:
|
|
193
|
+
"""
|
|
194
|
+
Click at (x, y).
|
|
195
|
+
"""
|
|
196
|
+
self.mousemove(x, y)
|
|
197
|
+
if right:
|
|
198
|
+
click_key = "0xC1"
|
|
199
|
+
else:
|
|
200
|
+
click_key = "0xC0"
|
|
201
|
+
self._run_ydotool(["click", click_key], count=count)
|
|
202
|
+
|
|
203
|
+
def type(self, text: str, count: int = 1) -> None:
|
|
204
|
+
"""
|
|
205
|
+
Type the given text.
|
|
206
|
+
"""
|
|
207
|
+
self._run_ydotool(["type", text], count=count)
|
|
208
|
+
|
|
209
|
+
def hotkey(self, keys: list[str], count: int = 1) -> None:
|
|
210
|
+
"""
|
|
211
|
+
Press and release the given keys.
|
|
212
|
+
"""
|
|
213
|
+
hotkey_sequences = [
|
|
214
|
+
self._get_keycode(key) for key in keys if self._get_keycode(key) is not None
|
|
215
|
+
]
|
|
216
|
+
command_args = [f"{keycode}:1" for keycode in hotkey_sequences] + [
|
|
217
|
+
f"{keycode}:0" for keycode in hotkey_sequences[::-1]
|
|
218
|
+
]
|
|
219
|
+
self._run_ydotool(["key", *command_args], count=count)
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
import time
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
|
|
13
|
+
from ..types import Action, ActionType, parse_coords, parse_drag_coords, parse_scroll
|
|
14
|
+
from .capslock_manager import CapsLockManager
|
|
15
|
+
from .wayland_support import Ydotool, get_screen_size
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class YdotoolConfig(BaseModel):
|
|
19
|
+
"""Configuration for YdotoolActionHandler."""
|
|
20
|
+
|
|
21
|
+
scroll_amount: int = Field(
|
|
22
|
+
default=20,
|
|
23
|
+
description="Amount to scroll (positive for up, negative for down)",
|
|
24
|
+
)
|
|
25
|
+
wait_duration: float = Field(
|
|
26
|
+
default=1.0, description="Duration for wait actions in seconds"
|
|
27
|
+
)
|
|
28
|
+
action_pause: float = Field(
|
|
29
|
+
default=0.5, description="Pause between Ydotool actions in seconds"
|
|
30
|
+
)
|
|
31
|
+
capslock_mode: str = Field(
|
|
32
|
+
default="session",
|
|
33
|
+
description="Caps lock handling mode: 'session' (internal state) or 'system' (OS-level)",
|
|
34
|
+
)
|
|
35
|
+
socket_address: str = Field(
|
|
36
|
+
default="",
|
|
37
|
+
description="Custom socket address for ydotool (e.g., '/tmp/ydotool.sock')",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class YdotoolActionHandler(Ydotool):
|
|
42
|
+
def __init__(self, config: YdotoolConfig | None = None) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Handles actions to be executed using Ydotool.
|
|
45
|
+
|
|
46
|
+
This class provides functionality for handling and executing a sequence of
|
|
47
|
+
actions using the Ydotool. It processes a list of actions and executes
|
|
48
|
+
them as per the implementation.
|
|
49
|
+
|
|
50
|
+
Methods:
|
|
51
|
+
__call__: Executes the provided list of actions.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
actions (list[Action]): List of actions to be processed and executed.
|
|
55
|
+
"""
|
|
56
|
+
# Use default config if none provided
|
|
57
|
+
self.config = config or YdotoolConfig()
|
|
58
|
+
super().__init__(socket_address=self.config.socket_address)
|
|
59
|
+
# Get screen dimensions for coordinate denormalization
|
|
60
|
+
self.screen_width, self.screen_height = get_screen_size()
|
|
61
|
+
# Set default delay between actions
|
|
62
|
+
self.action_pause = self.config.action_pause
|
|
63
|
+
# Initialize caps lock manager
|
|
64
|
+
self.caps_manager = CapsLockManager(mode=self.config.capslock_mode)
|
|
65
|
+
|
|
66
|
+
def reset(self):
|
|
67
|
+
"""Reset handler state.
|
|
68
|
+
|
|
69
|
+
Called at automation start/end and when FINISH action is received.
|
|
70
|
+
Resets the internal capslock state.
|
|
71
|
+
"""
|
|
72
|
+
self.caps_manager.reset()
|
|
73
|
+
|
|
74
|
+
def _execute_action(self, action: Action) -> bool:
|
|
75
|
+
"""
|
|
76
|
+
Execute a group of actions and return whether FINISH is reached.
|
|
77
|
+
"""
|
|
78
|
+
finished = False
|
|
79
|
+
arg = (action.argument or "").strip()
|
|
80
|
+
count = int(action.count or 1)
|
|
81
|
+
|
|
82
|
+
match action.type:
|
|
83
|
+
case ActionType.DRAG:
|
|
84
|
+
x1, y1, x2, y2 = self._parse_drag_coords(arg)
|
|
85
|
+
self.drag(x1, y1, x2, y2, count=count)
|
|
86
|
+
|
|
87
|
+
case ActionType.SCROLL:
|
|
88
|
+
x, y, direction = self._parse_scroll(arg)
|
|
89
|
+
scroll_amount = (
|
|
90
|
+
self.config.scroll_amount
|
|
91
|
+
if direction == "up"
|
|
92
|
+
else -self.config.scroll_amount
|
|
93
|
+
)
|
|
94
|
+
self.scroll(scroll_amount)
|
|
95
|
+
|
|
96
|
+
case ActionType.RIGHT_SINGLE:
|
|
97
|
+
x, y = self._parse_coords(arg)
|
|
98
|
+
self.click(x, y, right=True, count=count)
|
|
99
|
+
|
|
100
|
+
case ActionType.LEFT_DOUBLE:
|
|
101
|
+
x, y = self._parse_coords(arg)
|
|
102
|
+
self.click(x, y, count=2 * count)
|
|
103
|
+
|
|
104
|
+
case ActionType.LEFT_TRIPLE:
|
|
105
|
+
x, y = self._parse_coords(arg)
|
|
106
|
+
self.click(x, y, count=3 * count)
|
|
107
|
+
|
|
108
|
+
case ActionType.CLICK:
|
|
109
|
+
x, y = self._parse_coords(arg)
|
|
110
|
+
self.click(x, y, count=count)
|
|
111
|
+
|
|
112
|
+
case ActionType.HOTKEY:
|
|
113
|
+
keys = self._parse_hotkey(arg)
|
|
114
|
+
# Check if this is a caps lock key press
|
|
115
|
+
if len(keys) == 1 and keys[0] == "capslock":
|
|
116
|
+
if self.caps_manager.should_use_system_capslock():
|
|
117
|
+
# System mode: use OS-level caps lock
|
|
118
|
+
self.hotkey(["capslock"])
|
|
119
|
+
else:
|
|
120
|
+
# Session mode: toggle internal state
|
|
121
|
+
self.caps_manager.toggle()
|
|
122
|
+
else:
|
|
123
|
+
# Regular hotkey combination
|
|
124
|
+
self.hotkey(keys, count=count)
|
|
125
|
+
|
|
126
|
+
case ActionType.TYPE:
|
|
127
|
+
# Remove quotes if present
|
|
128
|
+
text = arg.strip("\"'")
|
|
129
|
+
# Apply caps lock transformation if needed
|
|
130
|
+
text = self.caps_manager.transform_text(text)
|
|
131
|
+
self._run_ydotool(["type", text], count=count)
|
|
132
|
+
|
|
133
|
+
case ActionType.FINISH:
|
|
134
|
+
# Task completion - reset handler state
|
|
135
|
+
self.reset()
|
|
136
|
+
|
|
137
|
+
case ActionType.WAIT:
|
|
138
|
+
# Wait for a short period
|
|
139
|
+
time.sleep(self.config.wait_duration)
|
|
140
|
+
|
|
141
|
+
case ActionType.CALL_USER:
|
|
142
|
+
# Call user - implementation depends on requirements
|
|
143
|
+
print("User intervention requested")
|
|
144
|
+
|
|
145
|
+
case _:
|
|
146
|
+
print(f"Unknown action type: {action.type}")
|
|
147
|
+
|
|
148
|
+
return finished
|
|
149
|
+
|
|
150
|
+
def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
|
|
151
|
+
"""Convert coordinates from 0-1000 range to actual screen coordinates.
|
|
152
|
+
|
|
153
|
+
Also handles corner coordinates to prevent PyAutoGUI fail-safe trigger.
|
|
154
|
+
Corner coordinates (0,0), (0,max), (max,0), (max,max) are offset by 1 pixel.
|
|
155
|
+
"""
|
|
156
|
+
screen_x = int(x * self.screen_width / 1000)
|
|
157
|
+
screen_y = int(y * self.screen_height / 1000)
|
|
158
|
+
|
|
159
|
+
# Prevent fail-safe by adjusting corner coordinates
|
|
160
|
+
# Check if coordinates are at screen corners (with small tolerance)
|
|
161
|
+
if screen_x < 1:
|
|
162
|
+
screen_x = 1
|
|
163
|
+
elif screen_x > self.screen_width - 1:
|
|
164
|
+
screen_x = self.screen_width - 1
|
|
165
|
+
|
|
166
|
+
if screen_y < 1:
|
|
167
|
+
screen_y = 1
|
|
168
|
+
elif screen_y > self.screen_height - 1:
|
|
169
|
+
screen_y = self.screen_height - 1
|
|
170
|
+
|
|
171
|
+
return screen_x, screen_y
|
|
172
|
+
|
|
173
|
+
def _normalize_key(self, key: str) -> str:
|
|
174
|
+
"""Normalize key names for consistency."""
|
|
175
|
+
key = key.strip().lower()
|
|
176
|
+
# Normalize caps lock variations
|
|
177
|
+
hotkey_variations_mapping = {
|
|
178
|
+
"capslock": ["caps_lock", "caps", "capslock"],
|
|
179
|
+
"pgup": ["page_up", "pageup"],
|
|
180
|
+
"pgdn": ["page_down", "pagedown"],
|
|
181
|
+
}
|
|
182
|
+
for normalized, variations in hotkey_variations_mapping.items():
|
|
183
|
+
if key in variations:
|
|
184
|
+
return normalized
|
|
185
|
+
return key
|
|
186
|
+
|
|
187
|
+
def _parse_coords(self, args_str: str) -> tuple[int, int]:
|
|
188
|
+
"""Extract x, y coordinates from argument string."""
|
|
189
|
+
coords = parse_coords(args_str)
|
|
190
|
+
if not coords:
|
|
191
|
+
raise ValueError(f"Invalid coordinates format: {args_str}")
|
|
192
|
+
return self._denormalize_coords(coords[0], coords[1])
|
|
193
|
+
|
|
194
|
+
def _parse_drag_coords(self, args_str: str) -> tuple[int, int, int, int]:
|
|
195
|
+
"""Extract x1, y1, x2, y2 coordinates from drag argument string."""
|
|
196
|
+
coords = parse_drag_coords(args_str)
|
|
197
|
+
if not coords:
|
|
198
|
+
raise ValueError(f"Invalid drag coordinates format: {args_str}")
|
|
199
|
+
x1, y1 = self._denormalize_coords(coords[0], coords[1])
|
|
200
|
+
x2, y2 = self._denormalize_coords(coords[2], coords[3])
|
|
201
|
+
return x1, y1, x2, y2
|
|
202
|
+
|
|
203
|
+
def _parse_scroll(self, args_str: str) -> tuple[int, int, str]:
|
|
204
|
+
"""Extract x, y, direction from scroll argument string."""
|
|
205
|
+
result = parse_scroll(args_str)
|
|
206
|
+
if not result:
|
|
207
|
+
raise ValueError(f"Invalid scroll format: {args_str}")
|
|
208
|
+
x, y = self._denormalize_coords(result[0], result[1])
|
|
209
|
+
return x, y, result[2]
|
|
210
|
+
|
|
211
|
+
def _parse_hotkey(self, args_str: str) -> list[str]:
|
|
212
|
+
"""Parse hotkey string into list of keys."""
|
|
213
|
+
# Remove parentheses if present
|
|
214
|
+
args_str = args_str.strip("()")
|
|
215
|
+
# Split by '+' to get individual keys
|
|
216
|
+
keys = [self._normalize_key(key) for key in args_str.split("+")]
|
|
217
|
+
return keys
|
|
218
|
+
|
|
219
|
+
def __call__(self, actions: list[Action]) -> None:
|
|
220
|
+
"""Execute the provided list of actions."""
|
|
221
|
+
for action in actions:
|
|
222
|
+
try:
|
|
223
|
+
self._execute_action(action)
|
|
224
|
+
except Exception as e:
|
|
225
|
+
print(f"Error executing action {action.type}: {e}")
|
|
226
|
+
raise
|
oagi/platform_info.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
"""Platform information and SDK headers for analytics."""
|
|
10
|
+
|
|
11
|
+
import platform
|
|
12
|
+
import sys
|
|
13
|
+
from importlib.metadata import version
|
|
14
|
+
|
|
15
|
+
SDK_NAME = "oagi-python"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_sdk_version() -> str:
|
|
19
|
+
"""Get the SDK version from package metadata."""
|
|
20
|
+
try:
|
|
21
|
+
return version("oagi-core")
|
|
22
|
+
except Exception:
|
|
23
|
+
return "unknown"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_user_agent() -> str:
|
|
27
|
+
"""Build User-Agent string.
|
|
28
|
+
|
|
29
|
+
Example: oagi-python/0.12.1 (python 3.11.5; darwin; arm64)
|
|
30
|
+
"""
|
|
31
|
+
return (
|
|
32
|
+
f"{SDK_NAME}/{get_sdk_version()} "
|
|
33
|
+
f"(python {platform.python_version()}; {sys.platform}; {platform.machine()})"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_sdk_headers() -> dict[str, str]:
|
|
38
|
+
"""Get SDK headers for API requests.
|
|
39
|
+
|
|
40
|
+
Returns headers for both debugging (User-Agent) and structured analytics
|
|
41
|
+
(x-sdk-* headers).
|
|
42
|
+
"""
|
|
43
|
+
return {
|
|
44
|
+
"User-Agent": get_user_agent(),
|
|
45
|
+
"x-sdk-name": SDK_NAME,
|
|
46
|
+
"x-sdk-version": get_sdk_version(),
|
|
47
|
+
"x-sdk-language": "python",
|
|
48
|
+
"x-sdk-language-version": platform.python_version(),
|
|
49
|
+
"x-sdk-os": sys.platform,
|
|
50
|
+
"x-sdk-platform": platform.machine(),
|
|
51
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: oagi-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.13.0
|
|
4
4
|
Summary: Official API of OpenAGI Foundation
|
|
5
5
|
Project-URL: Homepage, https://github.com/agiopen-org/oagi
|
|
6
6
|
Author-email: OpenAGI Foundation <contact@agiopen.org>
|
|
@@ -35,6 +35,7 @@ Requires-Dist: pillow>=9.0.0; extra == 'desktop'
|
|
|
35
35
|
Requires-Dist: pyautogui>=0.9.54; extra == 'desktop'
|
|
36
36
|
Requires-Dist: pyobjc-framework-applicationservices>=8.0; (sys_platform == 'darwin') and extra == 'desktop'
|
|
37
37
|
Requires-Dist: pyobjc-framework-quartz>=8.0; (sys_platform == 'darwin') and extra == 'desktop'
|
|
38
|
+
Requires-Dist: screeninfo>=0.8.1; extra == 'desktop'
|
|
38
39
|
Provides-Extra: server
|
|
39
40
|
Requires-Dist: fastapi[standard]>=0.100.0; extra == 'server'
|
|
40
41
|
Requires-Dist: pydantic-settings>=2.0.0; extra == 'server'
|
|
@@ -82,6 +83,7 @@ With Lux, possibilities are endless. Here are a few examples:
|
|
|
82
83
|
- [Command Line Interface](#command-line-interface)
|
|
83
84
|
- [Image Processing](#image-processing)
|
|
84
85
|
- [Manual Control with Actor](#manual-control-with-actor)
|
|
86
|
+
- [Run On System With Wayland](#run-on-system-with-wayland)
|
|
85
87
|
- [Examples](#examples)
|
|
86
88
|
- [Socket.IO Server (Optional)](#socketio-server-optional)
|
|
87
89
|
- [Installation](#installation-1)
|
|
@@ -239,6 +241,45 @@ async def main():
|
|
|
239
241
|
asyncio.run(main())
|
|
240
242
|
```
|
|
241
243
|
|
|
244
|
+
### Run On System With Wayland
|
|
245
|
+
The SDK includes support for desktop automation on systems with Wayland display, such as Ubuntu/Debain. It leverages `ydotool` and `flameshot` for mouse/keyboard actions and screenshot capture respectively. Please install these two tools on your system in advance and ensure `ydotoold` server is running in the background when running the script.
|
|
246
|
+
|
|
247
|
+
Refer to [ydotool](https://github.com/ReimuNotMoe/ydotool) and [flameshot](https://flameshot.org/#download) for installation instructions. Disable mouse acceleration for more precise mouse control. (In GNOME, run `gsettings set org.gnome.desktop.peripherals.mouse accel-profile 'flat'`)
|
|
248
|
+
|
|
249
|
+
Run tasks automatically with screenshot capture and action execution:
|
|
250
|
+
```python
|
|
251
|
+
import asyncio
|
|
252
|
+
from oagi import AsyncDefaultAgent, AsyncYdotoolActionHandler, AsyncScreenshotMaker
|
|
253
|
+
|
|
254
|
+
async def main():
|
|
255
|
+
agent = AsyncDefaultAgent(max_steps=10)
|
|
256
|
+
completed = await agent.execute(
|
|
257
|
+
"Search weather on Google",
|
|
258
|
+
action_handler=AsyncYdotoolActionHandler(), # Executes mouse/keyboard actions, based on 'ydotool'
|
|
259
|
+
image_provider=AsyncScreenshotMaker(), # Captures screenshots, based on 'flameshot'
|
|
260
|
+
)
|
|
261
|
+
return completed
|
|
262
|
+
|
|
263
|
+
asyncio.run(main())
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
Configure Ydotool behavior with custom settings:
|
|
267
|
+
|
|
268
|
+
```python
|
|
269
|
+
from oagi import AsyncYdotoolActionHandler, YdotoolConfig
|
|
270
|
+
|
|
271
|
+
# Customize action behavior
|
|
272
|
+
config = YdotoolConfig(
|
|
273
|
+
scroll_amount=50, # Larger scroll steps (default: 20)
|
|
274
|
+
wait_duration=2.0, # Longer waits (default: 1.0)
|
|
275
|
+
action_pause=1.0, # More pause between actions (default: 0.5)
|
|
276
|
+
capslock_mode="session", # Caps lock mode: 'session' or 'system' (default: 'session')
|
|
277
|
+
socket_address="/tmp/ydotool.sock" # Customized Socket address for ydotool (ydotool uses 'YDOTOOL_SOCKET' environment variable by default)
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
action_handler = AsyncYdotoolActionHandler(config=config)
|
|
281
|
+
```
|
|
282
|
+
|
|
242
283
|
## Examples
|
|
243
284
|
|
|
244
285
|
See the [`examples/`](examples/) directory for more usage patterns:
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
oagi/__init__.py,sha256=
|
|
1
|
+
oagi/__init__.py,sha256=ly956qE0F4FlzaU-kPsKAhpG2Kc3D9T_BJDZmWqioOc,5616
|
|
2
2
|
oagi/constants.py,sha256=ywyMimjh15tC5p4MBZjkJIqOVElPRS50iJtrPE3VClw,1211
|
|
3
3
|
oagi/exceptions.py,sha256=Rco37GQTPYUfc2vRO3hozxPF_s8mKFDpFvBg2UKWo3Y,3066
|
|
4
4
|
oagi/logging.py,sha256=YT3KCMFj5fzO98R9xlDDgfSotUuz1xRD6OZeYM2rKoo,1760
|
|
5
|
+
oagi/platform_info.py,sha256=GEqNWnwePszVEM21toGGi07o3PaX8O059CYRr0RUM_M,1424
|
|
5
6
|
oagi/actor/__init__.py,sha256=g_8_7ZLDLKuCGzyrB42OzY3gSOjd_SxzkJW3_pf-PXs,662
|
|
6
7
|
oagi/actor/async_.py,sha256=DcU6ifAcrYI1GSrTFgMPrWhikjmnJfZVaFsun15DG_k,3768
|
|
7
8
|
oagi/actor/async_short.py,sha256=QSo67aPsd_rxA2J2TR1fv6nJD6wO00q3S-QQYNK93q4,2821
|
|
@@ -9,7 +10,7 @@ oagi/actor/base.py,sha256=UUZdS1tufZDIbtQHLs-es6ptGHQpa-EPWf7RSbVpQnU,7921
|
|
|
9
10
|
oagi/actor/short.py,sha256=wKLCxvf7Ys6rYxXpHe4zbZdbf_1q1qcmm5WyWubwj3E,2630
|
|
10
11
|
oagi/actor/sync.py,sha256=QTY1WNTI75jwkWBghdVViHIp5rYkbm3kumlLedU8YeQ,3588
|
|
11
12
|
oagi/agent/__init__.py,sha256=KTVLUMhbjgpTJoOWMUZkkiqwhgumvbOZV2tJ9XCLfao,901
|
|
12
|
-
oagi/agent/default.py,sha256=
|
|
13
|
+
oagi/agent/default.py,sha256=Ax05kBa8Fb14Ev8wd9LS6xkF65grf6qdxxTlgpkkDuk,4715
|
|
13
14
|
oagi/agent/factories.py,sha256=syi_EOlU4SUjo-0CKaML8eIPu3ToUEKua2VHp9lvNF0,5839
|
|
14
15
|
oagi/agent/protocol.py,sha256=IQJGiMN4yZIacrh5e9JQsoM9TyHb8wJRQR4LAk8dSA0,1615
|
|
15
16
|
oagi/agent/registry.py,sha256=7bMA2-pH3xQ9ZavrHB_mnc2fOGSMeICPbOGtHoM7It0,4851
|
|
@@ -23,28 +24,33 @@ oagi/agent/tasker/__init__.py,sha256=1iTEFe7lzcqh96TL9R0QADPpLJLrUP0shtZ4DlZSv_8
|
|
|
23
24
|
oagi/agent/tasker/memory.py,sha256=NR13l5yxRA8GUE-oupAP4W1n80ZNG0SxpUfxsNltkUY,5033
|
|
24
25
|
oagi/agent/tasker/models.py,sha256=sMQgwIMKhT1tvVF2yoc1hh8GwEiJ6i6qPMy9WoiA8JM,2137
|
|
25
26
|
oagi/agent/tasker/planner.py,sha256=q6IvH6sfU2kYX1NcC9VHjGaQ0X9jF18yjuAYXisNCg0,15489
|
|
26
|
-
oagi/agent/tasker/taskee_agent.py,sha256=
|
|
27
|
-
oagi/agent/tasker/tasker_agent.py,sha256=
|
|
27
|
+
oagi/agent/tasker/taskee_agent.py,sha256=OugYJbTbFKxgNjbIyQBBH4Zm5u5PuWN1F6R81_eIro8,18090
|
|
28
|
+
oagi/agent/tasker/tasker_agent.py,sha256=yb0BdQzJyAPpK3njHPWgQruV8zpUGBXn1WjOGEMIO-g,11291
|
|
28
29
|
oagi/cli/__init__.py,sha256=aDnJViTseShpo5fdGPTj-ELysZhmdvB6Z8mEj2D-_N4,359
|
|
29
|
-
oagi/cli/agent.py,sha256=
|
|
30
|
+
oagi/cli/agent.py,sha256=lT95jgfzf-LizOWbzYfhq_EkcYcLvJH2uL-HKnYW_es,11533
|
|
30
31
|
oagi/cli/display.py,sha256=Y8_Dn5RIEfRqZUHVGF6URItW0C3XC7bPLWoAmmhvBS0,1829
|
|
31
32
|
oagi/cli/main.py,sha256=faHns0HaQCGyylDn2YZLpjQESuEiMYjoQVoMkt8FsH4,2292
|
|
32
33
|
oagi/cli/server.py,sha256=JFpzCOeaftITxesz8Ya-_Efs03bgotBg7aYwmMZhPwU,3033
|
|
33
34
|
oagi/cli/tracking.py,sha256=TdrAcNq_-OjgXltFCoFc8NsO_k6yHbdzHnMn3vAAvKA,1707
|
|
34
35
|
oagi/cli/utils.py,sha256=zIkTrr-ai__3cGSaxiXY-OJs69Fcxd1sHb2FoeyHFtE,3034
|
|
35
36
|
oagi/client/__init__.py,sha256=F9DShPUdb6vZYmN1fpM1VYzp4MWqUao_e_R1KYmM4Q4,410
|
|
36
|
-
oagi/client/async_.py,sha256=
|
|
37
|
-
oagi/client/base.py,sha256=
|
|
38
|
-
oagi/client/sync.py,sha256
|
|
39
|
-
oagi/handler/__init__.py,sha256=
|
|
37
|
+
oagi/client/async_.py,sha256=BANE0KU14WBuXp6suBhr8JSlpWhN5SR2aJJ7wAJBDLQ,9574
|
|
38
|
+
oagi/client/base.py,sha256=CWAvE0AcpL8HD_i00n7Fq53AIAQGhBhS_n6LifUCqxE,14736
|
|
39
|
+
oagi/client/sync.py,sha256=4xNqqNihXmgLU385h22mMJ9wmmlw-jeOdWI4fmpEpTk,9369
|
|
40
|
+
oagi/handler/__init__.py,sha256=rqxbj6Bp9MGndLBVhqRx8b-GSYR3PC5ktErMvd-HzRI,2397
|
|
40
41
|
oagi/handler/_macos.py,sha256=Gs8GrhA_WAyv9Yw0D41duliP32Xk6vouyMeWjWJJT90,5187
|
|
41
42
|
oagi/handler/_windows.py,sha256=MSgPDYEOetSjbn9eJDSrdzBVlUGgGsTlegaTDc4C4Ss,2828
|
|
43
|
+
oagi/handler/_ydotool.py,sha256=WjvE6RGRm8j3SEWpgfMw31aow3z3qkiMupuUHYt-QAM,2948
|
|
42
44
|
oagi/handler/async_pyautogui_action_handler.py,sha256=wfNRBBURZnwQkNTcs9OPMmFJIAPtnXmcqxWbjda_q7I,1863
|
|
43
45
|
oagi/handler/async_screenshot_maker.py,sha256=8QCtUV59ozpOpvkqhUMb8QDI2qje2gsoFT1qB60tfJM,1689
|
|
46
|
+
oagi/handler/async_ydotool_action_handler.py,sha256=BRGqZB2u1k7R1acUX9k0TfdrmWS2eh3opc8LoqnlwJ4,1848
|
|
44
47
|
oagi/handler/capslock_manager.py,sha256=40LzWt1_1wbncF5koUTdbd9V3eo5Ex_mEWwjtEmHAf4,1878
|
|
45
|
-
oagi/handler/pil_image.py,sha256=
|
|
48
|
+
oagi/handler/pil_image.py,sha256=GQw2o8ORQinrM3AxhgNBbLhrkZajOL8YagU7UF-kkes,4357
|
|
46
49
|
oagi/handler/pyautogui_action_handler.py,sha256=BVmpKuYAMINJ5Ue_PK_WxFScAqLeyXC64g4NWQUtG_M,10146
|
|
47
50
|
oagi/handler/screenshot_maker.py,sha256=j1jTW-awx3vAnb1N5_FIMBC0Z-rNVQbiBP-S6Gh5dlE,1284
|
|
51
|
+
oagi/handler/utils.py,sha256=jj10z-v4_LUuVb8aClyXkUfZVEaqsWgi3be4t3Gw7oI,697
|
|
52
|
+
oagi/handler/wayland_support.py,sha256=jeQDqpwAxxREaGAYePQuK14nuEMPGmMEvMz2ymS-rT4,7727
|
|
53
|
+
oagi/handler/ydotool_action_handler.py,sha256=8cmOFaEsYMI5BiYdYRuUIaSXDNgrdk-B3OnHVbEa9Sk,8608
|
|
48
54
|
oagi/server/__init__.py,sha256=uZx8u3vJUb87kkNzwmmVrgAgbqRu0WxyMIQCLSx56kk,452
|
|
49
55
|
oagi/server/agent_wrappers.py,sha256=j8va0A7u80bzOM82nndAplK1uaO_T3kufHWScK6kfWM,3263
|
|
50
56
|
oagi/server/config.py,sha256=AJ1PLKuxrc6pRuur1hm5DwG2g2otxPwOCfKgzIACkSk,1691
|
|
@@ -69,8 +75,8 @@ oagi/types/models/step.py,sha256=RSI4H_2rrUBq_xyCoWKaq7JHdJWNobtQppaKC1l0aWU,471
|
|
|
69
75
|
oagi/utils/__init__.py,sha256=vHXyX66hEsf33OJJkmZSUjaTYU0UngfbtjcZgxfOj3A,441
|
|
70
76
|
oagi/utils/output_parser.py,sha256=U7vzmoD8pyzDg23z3vy-L9a_jKPsAlr3x8lIdPszrY8,5322
|
|
71
77
|
oagi/utils/prompt_builder.py,sha256=_Q1HY82YUrq3jSCTZ3Rszu3qmI3Wn_fmq8hf14NuwQM,2180
|
|
72
|
-
oagi_core-0.
|
|
73
|
-
oagi_core-0.
|
|
74
|
-
oagi_core-0.
|
|
75
|
-
oagi_core-0.
|
|
76
|
-
oagi_core-0.
|
|
78
|
+
oagi_core-0.13.0.dist-info/METADATA,sha256=ZndadhpFZ4R5JEvm6O3jA_7zc8c2K0evAJRN_zaGa1I,14203
|
|
79
|
+
oagi_core-0.13.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
|
80
|
+
oagi_core-0.13.0.dist-info/entry_points.txt,sha256=zzgsOSWX6aN3KUB0Z1it8DMxFFBJBqmZVqMVAJRjYuw,44
|
|
81
|
+
oagi_core-0.13.0.dist-info/licenses/LICENSE,sha256=sy5DLA2M29jFT4UfWsuBF9BAr3FnRkYtnAu6oDZiIf8,1075
|
|
82
|
+
oagi_core-0.13.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|