oagi-core 0.10.2__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/agent/default.py +14 -4
- oagi/agent/factories.py +98 -16
- oagi/agent/tasker/planner.py +19 -8
- oagi/agent/tasker/taskee_agent.py +31 -9
- oagi/agent/tasker/tasker_agent.py +16 -5
- oagi/cli/agent.py +70 -31
- oagi/cli/display.py +2 -1
- oagi/cli/server.py +1 -1
- oagi/cli/utils.py +4 -3
- oagi/client/async_.py +19 -6
- oagi/client/base.py +14 -16
- oagi/client/sync.py +19 -6
- oagi/constants.py +43 -0
- oagi/handler/__init__.py +16 -0
- oagi/handler/_macos.py +137 -0
- oagi/handler/_windows.py +101 -0
- oagi/handler/async_pyautogui_action_handler.py +8 -0
- oagi/handler/capslock_manager.py +55 -0
- oagi/handler/pyautogui_action_handler.py +23 -40
- oagi/server/config.py +6 -3
- oagi/server/models.py +5 -3
- oagi/server/session_store.py +8 -6
- oagi/server/socketio_server.py +6 -5
- oagi/task/async_.py +4 -3
- oagi/task/async_short.py +3 -2
- oagi/task/base.py +2 -1
- oagi/task/short.py +3 -2
- oagi/task/sync.py +4 -3
- oagi/types/__init__.py +2 -1
- oagi/types/url.py +25 -0
- {oagi_core-0.10.2.dist-info → oagi_core-0.11.0.dist-info}/METADATA +34 -1
- {oagi_core-0.10.2.dist-info → oagi_core-0.11.0.dist-info}/RECORD +35 -32
- {oagi_core-0.10.2.dist-info → oagi_core-0.11.0.dist-info}/WHEEL +0 -0
- {oagi_core-0.10.2.dist-info → oagi_core-0.11.0.dist-info}/entry_points.txt +0 -0
- {oagi_core-0.10.2.dist-info → oagi_core-0.11.0.dist-info}/licenses/LICENSE +0 -0
oagi/client/base.py
CHANGED
|
@@ -11,6 +11,7 @@ from typing import Any, Generic, TypeVar
|
|
|
11
11
|
|
|
12
12
|
import httpx
|
|
13
13
|
|
|
14
|
+
from ..constants import API_KEY_HELP_URL, DEFAULT_BASE_URL, HTTP_CLIENT_TIMEOUT
|
|
14
15
|
from ..exceptions import (
|
|
15
16
|
APIError,
|
|
16
17
|
AuthenticationError,
|
|
@@ -41,20 +42,19 @@ class BaseClient(Generic[HttpClientT]):
|
|
|
41
42
|
|
|
42
43
|
def __init__(self, base_url: str | None = None, api_key: str | None = None):
|
|
43
44
|
# Get from environment if not provided
|
|
44
|
-
self.base_url = (
|
|
45
|
-
base_url or os.getenv("OAGI_BASE_URL") or "https://api.agiopen.org"
|
|
46
|
-
)
|
|
45
|
+
self.base_url = base_url or os.getenv("OAGI_BASE_URL") or DEFAULT_BASE_URL
|
|
47
46
|
self.api_key = api_key or os.getenv("OAGI_API_KEY")
|
|
48
47
|
|
|
49
48
|
# Validate required configuration
|
|
50
49
|
if not self.api_key:
|
|
51
50
|
raise ConfigurationError(
|
|
52
51
|
"OAGI API key must be provided either as 'api_key' parameter or "
|
|
53
|
-
"OAGI_API_KEY environment variable"
|
|
52
|
+
"OAGI_API_KEY environment variable. "
|
|
53
|
+
f"Get your API key at {API_KEY_HELP_URL}"
|
|
54
54
|
)
|
|
55
55
|
|
|
56
56
|
self.base_url = self.base_url.rstrip("/")
|
|
57
|
-
self.timeout =
|
|
57
|
+
self.timeout = HTTP_CLIENT_TIMEOUT
|
|
58
58
|
self.client: HttpClientT # Will be set by subclasses
|
|
59
59
|
|
|
60
60
|
logger.info(f"Client initialized with base_url: {self.base_url}")
|
|
@@ -273,22 +273,20 @@ class BaseClient(Generic[HttpClientT]):
|
|
|
273
273
|
NetworkError: If network error occurs
|
|
274
274
|
APIError: If API returns error or invalid response
|
|
275
275
|
"""
|
|
276
|
+
response_data = self._parse_response_json(response)
|
|
277
|
+
|
|
278
|
+
# Check for error status codes first (follows _process_response pattern)
|
|
279
|
+
if response.status_code != 200:
|
|
280
|
+
self._handle_response_error(response, response_data)
|
|
281
|
+
|
|
276
282
|
try:
|
|
277
|
-
response_data = response.json()
|
|
278
283
|
upload_file_response = UploadFileResponse(**response_data)
|
|
279
284
|
logger.debug("Calling /v1/file/upload successful")
|
|
280
285
|
return upload_file_response
|
|
281
|
-
except
|
|
282
|
-
logger.error(f"
|
|
283
|
-
raise APIError(
|
|
284
|
-
f"Invalid response format (status {response.status_code})",
|
|
285
|
-
status_code=response.status_code,
|
|
286
|
-
response=response,
|
|
287
|
-
)
|
|
288
|
-
except KeyError as e:
|
|
289
|
-
logger.error(f"Invalid response: {response.status_code}")
|
|
286
|
+
except Exception as e:
|
|
287
|
+
logger.error(f"Invalid upload response: {response.status_code}")
|
|
290
288
|
raise APIError(
|
|
291
|
-
f"Invalid presigned S3 URL response:
|
|
289
|
+
f"Invalid presigned S3 URL response: {e}",
|
|
292
290
|
status_code=response.status_code,
|
|
293
291
|
response=response,
|
|
294
292
|
)
|
oagi/client/sync.py
CHANGED
|
@@ -11,6 +11,13 @@ from functools import wraps
|
|
|
11
11
|
import httpx
|
|
12
12
|
from httpx import Response
|
|
13
13
|
|
|
14
|
+
from ..constants import (
|
|
15
|
+
API_HEALTH_ENDPOINT,
|
|
16
|
+
API_V1_FILE_UPLOAD_ENDPOINT,
|
|
17
|
+
API_V1_GENERATE_ENDPOINT,
|
|
18
|
+
API_V2_MESSAGE_ENDPOINT,
|
|
19
|
+
HTTP_CLIENT_TIMEOUT,
|
|
20
|
+
)
|
|
14
21
|
from ..logging import get_logger
|
|
15
22
|
from ..types import Image
|
|
16
23
|
from ..types.models import GenerateResponse, LLMResponse, UploadFileResponse
|
|
@@ -46,7 +53,7 @@ class SyncClient(BaseClient[httpx.Client]):
|
|
|
46
53
|
def __init__(self, base_url: str | None = None, api_key: str | None = None):
|
|
47
54
|
super().__init__(base_url, api_key)
|
|
48
55
|
self.client = httpx.Client(base_url=self.base_url)
|
|
49
|
-
self.upload_client = httpx.Client(timeout=
|
|
56
|
+
self.upload_client = httpx.Client(timeout=HTTP_CLIENT_TIMEOUT)
|
|
50
57
|
logger.info(f"SyncClient initialized with base_url: {self.base_url}")
|
|
51
58
|
|
|
52
59
|
def __enter__(self):
|
|
@@ -124,7 +131,10 @@ class SyncClient(BaseClient[httpx.Client]):
|
|
|
124
131
|
# Make request
|
|
125
132
|
try:
|
|
126
133
|
response = self.client.post(
|
|
127
|
-
|
|
134
|
+
API_V2_MESSAGE_ENDPOINT,
|
|
135
|
+
json=payload,
|
|
136
|
+
headers=headers,
|
|
137
|
+
timeout=self.timeout,
|
|
128
138
|
)
|
|
129
139
|
return self._process_response(response)
|
|
130
140
|
except (httpx.TimeoutException, httpx.NetworkError) as e:
|
|
@@ -139,7 +149,7 @@ class SyncClient(BaseClient[httpx.Client]):
|
|
|
139
149
|
"""
|
|
140
150
|
logger.debug("Making health check request")
|
|
141
151
|
try:
|
|
142
|
-
response = self.client.get(
|
|
152
|
+
response = self.client.get(API_HEALTH_ENDPOINT)
|
|
143
153
|
response.raise_for_status()
|
|
144
154
|
result = response.json()
|
|
145
155
|
logger.debug("Health check successful")
|
|
@@ -161,12 +171,12 @@ class SyncClient(BaseClient[httpx.Client]):
|
|
|
161
171
|
Returns:
|
|
162
172
|
UploadFileResponse: The response from /v1/file/upload with uuid and presigned S3 URL
|
|
163
173
|
"""
|
|
164
|
-
logger.debug("Making API request to
|
|
174
|
+
logger.debug(f"Making API request to {API_V1_FILE_UPLOAD_ENDPOINT}")
|
|
165
175
|
|
|
166
176
|
try:
|
|
167
177
|
headers = self._build_headers(api_version)
|
|
168
178
|
response = self.client.get(
|
|
169
|
-
|
|
179
|
+
API_V1_FILE_UPLOAD_ENDPOINT, headers=headers, timeout=self.timeout
|
|
170
180
|
)
|
|
171
181
|
return self._process_upload_response(response)
|
|
172
182
|
except (httpx.TimeoutException, httpx.NetworkError, httpx.HTTPStatusError) as e:
|
|
@@ -286,7 +296,10 @@ class SyncClient(BaseClient[httpx.Client]):
|
|
|
286
296
|
# Make request
|
|
287
297
|
try:
|
|
288
298
|
response = self.client.post(
|
|
289
|
-
|
|
299
|
+
API_V1_GENERATE_ENDPOINT,
|
|
300
|
+
json=payload,
|
|
301
|
+
headers=headers,
|
|
302
|
+
timeout=self.timeout,
|
|
290
303
|
)
|
|
291
304
|
return self._process_generate_response(response)
|
|
292
305
|
except (httpx.TimeoutException, httpx.NetworkError) as e:
|
oagi/constants.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
# URLs & API Endpoints
|
|
10
|
+
DEFAULT_BASE_URL = "https://api.agiopen.org"
|
|
11
|
+
API_KEY_HELP_URL = "https://developer.agiopen.org/api-keys"
|
|
12
|
+
API_V2_MESSAGE_ENDPOINT = "/v2/message"
|
|
13
|
+
API_V1_FILE_UPLOAD_ENDPOINT = "/v1/file/upload"
|
|
14
|
+
API_V1_GENERATE_ENDPOINT = "/v1/generate"
|
|
15
|
+
API_HEALTH_ENDPOINT = "/health"
|
|
16
|
+
|
|
17
|
+
# Model identifiers
|
|
18
|
+
MODEL_ACTOR = "lux-actor-1"
|
|
19
|
+
MODEL_THINKER = "lux-thinker-1"
|
|
20
|
+
|
|
21
|
+
# Agent modes
|
|
22
|
+
MODE_ACTOR = "actor"
|
|
23
|
+
MODE_THINKER = "thinker"
|
|
24
|
+
MODE_TASKER = "tasker"
|
|
25
|
+
|
|
26
|
+
# Default max steps per model
|
|
27
|
+
DEFAULT_MAX_STEPS = 20
|
|
28
|
+
DEFAULT_MAX_STEPS_THINKER = 100
|
|
29
|
+
DEFAULT_MAX_STEPS_TASKER = 60
|
|
30
|
+
|
|
31
|
+
# Reflection intervals
|
|
32
|
+
DEFAULT_REFLECTION_INTERVAL = 4
|
|
33
|
+
DEFAULT_REFLECTION_INTERVAL_TASKER = 20
|
|
34
|
+
|
|
35
|
+
# Timing & Delays
|
|
36
|
+
DEFAULT_STEP_DELAY = 0.3
|
|
37
|
+
|
|
38
|
+
# Temperature Defaults
|
|
39
|
+
DEFAULT_TEMPERATURE = 0.5
|
|
40
|
+
DEFAULT_TEMPERATURE_LOW = 0.1
|
|
41
|
+
|
|
42
|
+
# Timeout Values
|
|
43
|
+
HTTP_CLIENT_TIMEOUT = 60
|
oagi/handler/__init__.py
CHANGED
|
@@ -14,6 +14,21 @@ from oagi.handler.pyautogui_action_handler import (
|
|
|
14
14
|
)
|
|
15
15
|
from oagi.handler.screenshot_maker import ScreenshotMaker
|
|
16
16
|
|
|
17
|
+
|
|
18
|
+
def reset_handler(handler) -> None:
|
|
19
|
+
"""Reset handler state if supported.
|
|
20
|
+
|
|
21
|
+
Uses duck-typing to check if the handler has a reset() method.
|
|
22
|
+
This allows handlers to reset their internal state (e.g., capslock state)
|
|
23
|
+
at the start of a new automation task.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
handler: The action handler to reset
|
|
27
|
+
"""
|
|
28
|
+
if hasattr(handler, "reset"):
|
|
29
|
+
handler.reset()
|
|
30
|
+
|
|
31
|
+
|
|
17
32
|
__all__ = [
|
|
18
33
|
"PILImage",
|
|
19
34
|
"PyautoguiActionHandler",
|
|
@@ -21,4 +36,5 @@ __all__ = [
|
|
|
21
36
|
"AsyncPyautoguiActionHandler",
|
|
22
37
|
"ScreenshotMaker",
|
|
23
38
|
"AsyncScreenshotMaker",
|
|
39
|
+
"reset_handler",
|
|
24
40
|
]
|
oagi/handler/_macos.py
CHANGED
|
@@ -6,6 +6,15 @@
|
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
8
|
|
|
9
|
+
"""macOS-specific keyboard and mouse input handling.
|
|
10
|
+
|
|
11
|
+
This module provides:
|
|
12
|
+
- macos_click(): Fix for PyAutoGUI multi-click bug on macOS
|
|
13
|
+
- typewrite_exact(): Type text exactly, ignoring system capslock state
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import time
|
|
17
|
+
|
|
9
18
|
import pyautogui
|
|
10
19
|
|
|
11
20
|
from ..exceptions import check_optional_dependency
|
|
@@ -13,6 +22,134 @@ from ..exceptions import check_optional_dependency
|
|
|
13
22
|
check_optional_dependency("Quartz", "macOS multiple clicks", "desktop")
|
|
14
23
|
import Quartz # noqa: E402
|
|
15
24
|
|
|
25
|
+
# macOS virtual key codes for typeable characters
|
|
26
|
+
KEYCODE_MAP = {
|
|
27
|
+
"a": 0x00,
|
|
28
|
+
"b": 0x0B,
|
|
29
|
+
"c": 0x08,
|
|
30
|
+
"d": 0x02,
|
|
31
|
+
"e": 0x0E,
|
|
32
|
+
"f": 0x03,
|
|
33
|
+
"g": 0x05,
|
|
34
|
+
"h": 0x04,
|
|
35
|
+
"i": 0x22,
|
|
36
|
+
"j": 0x26,
|
|
37
|
+
"k": 0x28,
|
|
38
|
+
"l": 0x25,
|
|
39
|
+
"m": 0x2E,
|
|
40
|
+
"n": 0x2D,
|
|
41
|
+
"o": 0x1F,
|
|
42
|
+
"p": 0x23,
|
|
43
|
+
"q": 0x0C,
|
|
44
|
+
"r": 0x0F,
|
|
45
|
+
"s": 0x01,
|
|
46
|
+
"t": 0x11,
|
|
47
|
+
"u": 0x20,
|
|
48
|
+
"v": 0x09,
|
|
49
|
+
"w": 0x0D,
|
|
50
|
+
"x": 0x07,
|
|
51
|
+
"y": 0x10,
|
|
52
|
+
"z": 0x06,
|
|
53
|
+
"1": 0x12,
|
|
54
|
+
"2": 0x13,
|
|
55
|
+
"3": 0x14,
|
|
56
|
+
"4": 0x15,
|
|
57
|
+
"5": 0x17,
|
|
58
|
+
"6": 0x16,
|
|
59
|
+
"7": 0x1A,
|
|
60
|
+
"8": 0x1C,
|
|
61
|
+
"9": 0x19,
|
|
62
|
+
"0": 0x1D,
|
|
63
|
+
" ": 0x31, # space
|
|
64
|
+
"-": 0x1B,
|
|
65
|
+
"=": 0x18,
|
|
66
|
+
"[": 0x21,
|
|
67
|
+
"]": 0x1E,
|
|
68
|
+
"\\": 0x2A,
|
|
69
|
+
";": 0x29,
|
|
70
|
+
"'": 0x27,
|
|
71
|
+
"`": 0x32,
|
|
72
|
+
",": 0x2B,
|
|
73
|
+
".": 0x2F,
|
|
74
|
+
"/": 0x2C,
|
|
75
|
+
"\t": 0x30, # tab
|
|
76
|
+
"\n": 0x24, # return
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
# Characters that require shift key (on US keyboard layout)
|
|
80
|
+
SHIFT_CHARS = set('~!@#$%^&*()_+{}|:"<>?ABCDEFGHIJKLMNOPQRSTUVWXYZ')
|
|
81
|
+
|
|
82
|
+
# Mapping of shifted characters to their base key
|
|
83
|
+
SHIFT_KEY_MAP = {
|
|
84
|
+
"~": "`",
|
|
85
|
+
"!": "1",
|
|
86
|
+
"@": "2",
|
|
87
|
+
"#": "3",
|
|
88
|
+
"$": "4",
|
|
89
|
+
"%": "5",
|
|
90
|
+
"^": "6",
|
|
91
|
+
"&": "7",
|
|
92
|
+
"*": "8",
|
|
93
|
+
"(": "9",
|
|
94
|
+
")": "0",
|
|
95
|
+
"_": "-",
|
|
96
|
+
"+": "=",
|
|
97
|
+
"{": "[",
|
|
98
|
+
"}": "]",
|
|
99
|
+
"|": "\\",
|
|
100
|
+
":": ";",
|
|
101
|
+
'"': "'",
|
|
102
|
+
"<": ",",
|
|
103
|
+
">": ".",
|
|
104
|
+
"?": "/",
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def typewrite_exact(text: str, interval: float = 0.01) -> None:
|
|
109
|
+
"""Type text exactly as specified, ignoring system capslock state.
|
|
110
|
+
|
|
111
|
+
This function uses Quartz CGEventCreateKeyboardEvent with explicit
|
|
112
|
+
flag control via CGEventSetFlags() to type each character with the
|
|
113
|
+
correct case, regardless of the system's capslock state.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
text: The text to type exactly as specified
|
|
117
|
+
interval: Time in seconds between each character (default: 0.01)
|
|
118
|
+
"""
|
|
119
|
+
for char in text:
|
|
120
|
+
# Determine if this character needs shift
|
|
121
|
+
needs_shift = char in SHIFT_CHARS
|
|
122
|
+
|
|
123
|
+
# Get the base key (for shifted chars, look up the unshifted version)
|
|
124
|
+
if char.isupper():
|
|
125
|
+
base_char = char.lower()
|
|
126
|
+
elif char in SHIFT_KEY_MAP:
|
|
127
|
+
base_char = SHIFT_KEY_MAP[char]
|
|
128
|
+
else:
|
|
129
|
+
base_char = char
|
|
130
|
+
|
|
131
|
+
# Get keycode for the base character
|
|
132
|
+
keycode = KEYCODE_MAP.get(base_char)
|
|
133
|
+
if keycode is None:
|
|
134
|
+
# Character not in our keycode map, skip it
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
# Set flags: shift if needed, otherwise clear all flags
|
|
138
|
+
flags = Quartz.kCGEventFlagMaskShift if needs_shift else 0
|
|
139
|
+
|
|
140
|
+
# Key down
|
|
141
|
+
event_down = Quartz.CGEventCreateKeyboardEvent(None, keycode, True)
|
|
142
|
+
Quartz.CGEventSetFlags(event_down, flags)
|
|
143
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, event_down)
|
|
144
|
+
|
|
145
|
+
# Key up
|
|
146
|
+
event_up = Quartz.CGEventCreateKeyboardEvent(None, keycode, False)
|
|
147
|
+
Quartz.CGEventSetFlags(event_up, flags)
|
|
148
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, event_up)
|
|
149
|
+
|
|
150
|
+
if interval > 0:
|
|
151
|
+
time.sleep(interval)
|
|
152
|
+
|
|
16
153
|
|
|
17
154
|
def macos_click(x: int, y: int, clicks: int = 1) -> None:
|
|
18
155
|
"""
|
oagi/handler/_windows.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
"""Windows-specific keyboard input handling.
|
|
10
|
+
|
|
11
|
+
This module provides typewrite_exact() which types text exactly as specified,
|
|
12
|
+
ignoring the system's capslock state by using SendInput with KEYEVENTF_UNICODE.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import ctypes
|
|
16
|
+
import time
|
|
17
|
+
from ctypes import wintypes
|
|
18
|
+
|
|
19
|
+
INPUT_KEYBOARD = 1
|
|
20
|
+
KEYEVENTF_UNICODE = 0x0004
|
|
21
|
+
KEYEVENTF_KEYUP = 0x0002
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class KEYBDINPUT(ctypes.Structure):
|
|
25
|
+
_fields_ = [
|
|
26
|
+
("wVk", wintypes.WORD),
|
|
27
|
+
("wScan", wintypes.WORD),
|
|
28
|
+
("dwFlags", wintypes.DWORD),
|
|
29
|
+
("time", wintypes.DWORD),
|
|
30
|
+
("dwExtraInfo", ctypes.POINTER(ctypes.c_ulong)),
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class MOUSEINPUT(ctypes.Structure):
|
|
35
|
+
_fields_ = [
|
|
36
|
+
("dx", ctypes.c_long),
|
|
37
|
+
("dy", ctypes.c_long),
|
|
38
|
+
("mouseData", wintypes.DWORD),
|
|
39
|
+
("dwFlags", wintypes.DWORD),
|
|
40
|
+
("time", wintypes.DWORD),
|
|
41
|
+
("dwExtraInfo", ctypes.POINTER(ctypes.c_ulong)),
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class HARDWAREINPUT(ctypes.Structure):
|
|
46
|
+
_fields_ = [
|
|
47
|
+
("uMsg", wintypes.DWORD),
|
|
48
|
+
("wParamL", wintypes.WORD),
|
|
49
|
+
("wParamH", wintypes.WORD),
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class INPUT(ctypes.Structure):
|
|
54
|
+
class _I(ctypes.Union):
|
|
55
|
+
_fields_ = [
|
|
56
|
+
("ki", KEYBDINPUT),
|
|
57
|
+
("mi", MOUSEINPUT),
|
|
58
|
+
("hi", HARDWAREINPUT),
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
_anonymous_ = ("i",)
|
|
62
|
+
_fields_ = [
|
|
63
|
+
("type", wintypes.DWORD),
|
|
64
|
+
("i", _I),
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Configure SendInput with proper argtypes for 64-bit compatibility
|
|
69
|
+
SendInput = ctypes.windll.user32.SendInput
|
|
70
|
+
SendInput.argtypes = [wintypes.UINT, ctypes.POINTER(INPUT), ctypes.c_int]
|
|
71
|
+
SendInput.restype = wintypes.UINT
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def typewrite_exact(text: str, interval: float = 0.01) -> None:
|
|
75
|
+
"""Type text exactly using Unicode input - ignores capslock, keyboard layout, etc.
|
|
76
|
+
|
|
77
|
+
This function uses SendInput with KEYEVENTF_UNICODE to send characters
|
|
78
|
+
directly by their Unicode codepoint, completely bypassing keyboard state
|
|
79
|
+
(capslock, layout, etc.).
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
text: The text to type exactly as specified
|
|
83
|
+
interval: Time in seconds between each character (default: 0.01)
|
|
84
|
+
"""
|
|
85
|
+
for char in text:
|
|
86
|
+
inputs = (INPUT * 2)()
|
|
87
|
+
|
|
88
|
+
# Key down
|
|
89
|
+
inputs[0].type = INPUT_KEYBOARD
|
|
90
|
+
inputs[0].ki.wScan = ord(char)
|
|
91
|
+
inputs[0].ki.dwFlags = KEYEVENTF_UNICODE
|
|
92
|
+
|
|
93
|
+
# Key up
|
|
94
|
+
inputs[1].type = INPUT_KEYBOARD
|
|
95
|
+
inputs[1].ki.wScan = ord(char)
|
|
96
|
+
inputs[1].ki.dwFlags = KEYEVENTF_UNICODE | KEYEVENTF_KEYUP
|
|
97
|
+
|
|
98
|
+
SendInput(2, inputs, ctypes.sizeof(INPUT))
|
|
99
|
+
|
|
100
|
+
if interval > 0:
|
|
101
|
+
time.sleep(interval)
|
|
@@ -29,6 +29,14 @@ class AsyncPyautoguiActionHandler:
|
|
|
29
29
|
self.sync_handler = PyautoguiActionHandler(config=config)
|
|
30
30
|
self.config = config or PyautoguiConfig()
|
|
31
31
|
|
|
32
|
+
def reset(self):
|
|
33
|
+
"""Reset handler state.
|
|
34
|
+
|
|
35
|
+
Delegates to the underlying synchronous handler's reset method.
|
|
36
|
+
Called at automation start/end and when FINISH action is received.
|
|
37
|
+
"""
|
|
38
|
+
self.sync_handler.reset()
|
|
39
|
+
|
|
32
40
|
async def __call__(self, actions: list[Action]) -> None:
|
|
33
41
|
"""
|
|
34
42
|
Execute actions asynchronously using a thread pool executor.
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CapsLockManager:
|
|
11
|
+
"""Manages caps lock state for text transformation.
|
|
12
|
+
|
|
13
|
+
This class maintains an internal caps lock state that can be toggled
|
|
14
|
+
independently of the system's caps lock state. This allows for consistent
|
|
15
|
+
text case handling during automation regardless of the system state.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, mode: str = "session"):
|
|
19
|
+
"""Initialize caps lock manager.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
mode: Either "session" (internal state) or "system" (OS-level)
|
|
23
|
+
"""
|
|
24
|
+
self.mode = mode
|
|
25
|
+
self.caps_enabled = False
|
|
26
|
+
|
|
27
|
+
def reset(self):
|
|
28
|
+
"""Reset caps lock state to default (off).
|
|
29
|
+
|
|
30
|
+
Called at automation start/end and when FINISH action is received.
|
|
31
|
+
"""
|
|
32
|
+
self.caps_enabled = False
|
|
33
|
+
|
|
34
|
+
def toggle(self):
|
|
35
|
+
"""Toggle caps lock state in session mode."""
|
|
36
|
+
if self.mode == "session":
|
|
37
|
+
self.caps_enabled = not self.caps_enabled
|
|
38
|
+
|
|
39
|
+
def transform_text(self, text: str) -> str:
|
|
40
|
+
"""Transform text based on caps lock state.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
text: Input text to transform
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Transformed text (uppercase alphabets if caps enabled in session mode)
|
|
47
|
+
"""
|
|
48
|
+
if self.mode == "session" and self.caps_enabled:
|
|
49
|
+
# Transform letters to uppercase, preserve special characters
|
|
50
|
+
return "".join(c.upper() if c.isalpha() else c for c in text)
|
|
51
|
+
return text
|
|
52
|
+
|
|
53
|
+
def should_use_system_capslock(self) -> bool:
|
|
54
|
+
"""Check if system-level caps lock should be used."""
|
|
55
|
+
return self.mode == "system"
|
|
@@ -13,48 +13,15 @@ from pydantic import BaseModel, Field
|
|
|
13
13
|
|
|
14
14
|
from ..exceptions import check_optional_dependency
|
|
15
15
|
from ..types import Action, ActionType, parse_coords, parse_drag_coords, parse_scroll
|
|
16
|
+
from .capslock_manager import CapsLockManager
|
|
16
17
|
|
|
17
18
|
check_optional_dependency("pyautogui", "PyautoguiActionHandler", "desktop")
|
|
18
19
|
import pyautogui # noqa: E402
|
|
19
20
|
|
|
20
21
|
if sys.platform == "darwin":
|
|
21
22
|
from . import _macos
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class CapsLockManager:
|
|
25
|
-
"""Manages caps lock state for text transformation."""
|
|
26
|
-
|
|
27
|
-
def __init__(self, mode: str = "session"):
|
|
28
|
-
"""Initialize caps lock manager.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
mode: Either "session" (internal state) or "system" (OS-level)
|
|
32
|
-
"""
|
|
33
|
-
self.mode = mode
|
|
34
|
-
self.caps_enabled = False
|
|
35
|
-
|
|
36
|
-
def toggle(self):
|
|
37
|
-
"""Toggle caps lock state in session mode."""
|
|
38
|
-
if self.mode == "session":
|
|
39
|
-
self.caps_enabled = not self.caps_enabled
|
|
40
|
-
|
|
41
|
-
def transform_text(self, text: str) -> str:
|
|
42
|
-
"""Transform text based on caps lock state.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
text: Input text to transform
|
|
46
|
-
|
|
47
|
-
Returns:
|
|
48
|
-
Transformed text (uppercase if caps enabled in session mode)
|
|
49
|
-
"""
|
|
50
|
-
if self.mode == "session" and self.caps_enabled:
|
|
51
|
-
# Transform letters to uppercase, preserve special characters
|
|
52
|
-
return "".join(c.upper() if c.isalpha() else c for c in text)
|
|
53
|
-
return text
|
|
54
|
-
|
|
55
|
-
def should_use_system_capslock(self) -> bool:
|
|
56
|
-
"""Check if system-level caps lock should be used."""
|
|
57
|
-
return self.mode == "system"
|
|
23
|
+
elif sys.platform == "win32":
|
|
24
|
+
from . import _windows
|
|
58
25
|
|
|
59
26
|
|
|
60
27
|
class PyautoguiConfig(BaseModel):
|
|
@@ -64,7 +31,8 @@ class PyautoguiConfig(BaseModel):
|
|
|
64
31
|
default=0.5, description="Duration for drag operations in seconds"
|
|
65
32
|
)
|
|
66
33
|
scroll_amount: int = Field(
|
|
67
|
-
default=
|
|
34
|
+
default=2 if sys.platform == "darwin" else 100,
|
|
35
|
+
description="Amount to scroll (positive for up, negative for down)",
|
|
68
36
|
)
|
|
69
37
|
wait_duration: float = Field(
|
|
70
38
|
default=1.0, description="Duration for wait actions in seconds"
|
|
@@ -110,6 +78,14 @@ class PyautoguiActionHandler:
|
|
|
110
78
|
# Initialize caps lock manager
|
|
111
79
|
self.caps_manager = CapsLockManager(mode=self.config.capslock_mode)
|
|
112
80
|
|
|
81
|
+
def reset(self):
|
|
82
|
+
"""Reset handler state.
|
|
83
|
+
|
|
84
|
+
Called at automation start/end and when FINISH action is received.
|
|
85
|
+
Resets the internal capslock state.
|
|
86
|
+
"""
|
|
87
|
+
self.caps_manager.reset()
|
|
88
|
+
|
|
113
89
|
def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
|
|
114
90
|
"""Convert coordinates from 0-1000 range to actual screen coordinates.
|
|
115
91
|
|
|
@@ -237,7 +213,14 @@ class PyautoguiActionHandler:
|
|
|
237
213
|
text = arg.strip("\"'")
|
|
238
214
|
# Apply caps lock transformation if needed
|
|
239
215
|
text = self.caps_manager.transform_text(text)
|
|
240
|
-
|
|
216
|
+
# Use platform-specific typing that ignores system capslock
|
|
217
|
+
if sys.platform == "darwin":
|
|
218
|
+
_macos.typewrite_exact(text)
|
|
219
|
+
elif sys.platform == "win32":
|
|
220
|
+
_windows.typewrite_exact(text)
|
|
221
|
+
else:
|
|
222
|
+
# Fallback for other platforms
|
|
223
|
+
pyautogui.typewrite(text)
|
|
241
224
|
|
|
242
225
|
case ActionType.SCROLL:
|
|
243
226
|
x, y, direction = self._parse_scroll(arg)
|
|
@@ -250,8 +233,8 @@ class PyautoguiActionHandler:
|
|
|
250
233
|
pyautogui.scroll(scroll_amount)
|
|
251
234
|
|
|
252
235
|
case ActionType.FINISH:
|
|
253
|
-
# Task completion -
|
|
254
|
-
|
|
236
|
+
# Task completion - reset handler state
|
|
237
|
+
self.reset()
|
|
255
238
|
|
|
256
239
|
case ActionType.WAIT:
|
|
257
240
|
# Wait for a short period
|
oagi/server/config.py
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
from pydantic import Field
|
|
10
10
|
|
|
11
|
+
from ..constants import DEFAULT_MAX_STEPS, MODEL_ACTOR
|
|
11
12
|
from ..exceptions import check_optional_dependency
|
|
12
13
|
|
|
13
14
|
check_optional_dependency("pydantic_settings", "Server features", "server")
|
|
@@ -20,7 +21,7 @@ class ServerConfig(BaseSettings):
|
|
|
20
21
|
oagi_base_url: str = Field(default="https://api.agiopen.org", alias="OAGI_BASE_URL")
|
|
21
22
|
|
|
22
23
|
# Server settings
|
|
23
|
-
server_host: str = Field(default="
|
|
24
|
+
server_host: str = Field(default="127.0.0.1", alias="OAGI_SERVER_HOST")
|
|
24
25
|
server_port: int = Field(default=8000, alias="OAGI_SERVER_PORT")
|
|
25
26
|
cors_allowed_origins: str = Field(default="*", alias="OAGI_CORS_ORIGINS")
|
|
26
27
|
|
|
@@ -28,11 +29,13 @@ class ServerConfig(BaseSettings):
|
|
|
28
29
|
session_timeout_seconds: float = Field(default=10.0)
|
|
29
30
|
|
|
30
31
|
# Model settings
|
|
31
|
-
default_model: str = Field(default=
|
|
32
|
+
default_model: str = Field(default=MODEL_ACTOR, alias="OAGI_DEFAULT_MODEL")
|
|
32
33
|
default_temperature: float = Field(default=0.5, ge=0.0, le=2.0)
|
|
33
34
|
|
|
34
35
|
# Agent settings
|
|
35
|
-
max_steps: int = Field(
|
|
36
|
+
max_steps: int = Field(
|
|
37
|
+
default=DEFAULT_MAX_STEPS, alias="OAGI_MAX_STEPS", ge=1, le=200
|
|
38
|
+
)
|
|
36
39
|
|
|
37
40
|
# Socket.IO settings
|
|
38
41
|
socketio_path: str = Field(default="/socket.io")
|
oagi/server/models.py
CHANGED
|
@@ -10,13 +10,15 @@ from typing import Literal
|
|
|
10
10
|
|
|
11
11
|
from pydantic import BaseModel, Field
|
|
12
12
|
|
|
13
|
+
from ..constants import DEFAULT_TEMPERATURE_LOW, MODE_ACTOR, MODEL_ACTOR
|
|
14
|
+
|
|
13
15
|
|
|
14
16
|
# Client-to-server events
|
|
15
17
|
class InitEventData(BaseModel):
|
|
16
18
|
instruction: str = Field(...)
|
|
17
|
-
mode: str | None = Field(default=
|
|
18
|
-
model: str | None = Field(default=
|
|
19
|
-
temperature: float | None = Field(default=
|
|
19
|
+
mode: str | None = Field(default=MODE_ACTOR)
|
|
20
|
+
model: str | None = Field(default=MODEL_ACTOR)
|
|
21
|
+
temperature: float | None = Field(default=DEFAULT_TEMPERATURE_LOW, ge=0.0, le=2.0)
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
# Server-to-client events
|