oagi-core 0.10.2__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oagi/client/base.py CHANGED
@@ -11,6 +11,7 @@ from typing import Any, Generic, TypeVar
11
11
 
12
12
  import httpx
13
13
 
14
+ from ..constants import API_KEY_HELP_URL, DEFAULT_BASE_URL, HTTP_CLIENT_TIMEOUT
14
15
  from ..exceptions import (
15
16
  APIError,
16
17
  AuthenticationError,
@@ -41,20 +42,19 @@ class BaseClient(Generic[HttpClientT]):
41
42
 
42
43
  def __init__(self, base_url: str | None = None, api_key: str | None = None):
43
44
  # Get from environment if not provided
44
- self.base_url = (
45
- base_url or os.getenv("OAGI_BASE_URL") or "https://api.agiopen.org"
46
- )
45
+ self.base_url = base_url or os.getenv("OAGI_BASE_URL") or DEFAULT_BASE_URL
47
46
  self.api_key = api_key or os.getenv("OAGI_API_KEY")
48
47
 
49
48
  # Validate required configuration
50
49
  if not self.api_key:
51
50
  raise ConfigurationError(
52
51
  "OAGI API key must be provided either as 'api_key' parameter or "
53
- "OAGI_API_KEY environment variable"
52
+ "OAGI_API_KEY environment variable. "
53
+ f"Get your API key at {API_KEY_HELP_URL}"
54
54
  )
55
55
 
56
56
  self.base_url = self.base_url.rstrip("/")
57
- self.timeout = 60
57
+ self.timeout = HTTP_CLIENT_TIMEOUT
58
58
  self.client: HttpClientT # Will be set by subclasses
59
59
 
60
60
  logger.info(f"Client initialized with base_url: {self.base_url}")
@@ -273,22 +273,20 @@ class BaseClient(Generic[HttpClientT]):
273
273
  NetworkError: If network error occurs
274
274
  APIError: If API returns error or invalid response
275
275
  """
276
+ response_data = self._parse_response_json(response)
277
+
278
+ # Check for error status codes first (follows _process_response pattern)
279
+ if response.status_code != 200:
280
+ self._handle_response_error(response, response_data)
281
+
276
282
  try:
277
- response_data = response.json()
278
283
  upload_file_response = UploadFileResponse(**response_data)
279
284
  logger.debug("Calling /v1/file/upload successful")
280
285
  return upload_file_response
281
- except ValueError:
282
- logger.error(f"Non-JSON API response: {response.status_code}")
283
- raise APIError(
284
- f"Invalid response format (status {response.status_code})",
285
- status_code=response.status_code,
286
- response=response,
287
- )
288
- except KeyError as e:
289
- logger.error(f"Invalid response: {response.status_code}")
286
+ except Exception as e:
287
+ logger.error(f"Invalid upload response: {response.status_code}")
290
288
  raise APIError(
291
- f"Invalid presigned S3 URL response: missing field {e}",
289
+ f"Invalid presigned S3 URL response: {e}",
292
290
  status_code=response.status_code,
293
291
  response=response,
294
292
  )
oagi/client/sync.py CHANGED
@@ -11,6 +11,13 @@ from functools import wraps
11
11
  import httpx
12
12
  from httpx import Response
13
13
 
14
+ from ..constants import (
15
+ API_HEALTH_ENDPOINT,
16
+ API_V1_FILE_UPLOAD_ENDPOINT,
17
+ API_V1_GENERATE_ENDPOINT,
18
+ API_V2_MESSAGE_ENDPOINT,
19
+ HTTP_CLIENT_TIMEOUT,
20
+ )
14
21
  from ..logging import get_logger
15
22
  from ..types import Image
16
23
  from ..types.models import GenerateResponse, LLMResponse, UploadFileResponse
@@ -46,7 +53,7 @@ class SyncClient(BaseClient[httpx.Client]):
46
53
  def __init__(self, base_url: str | None = None, api_key: str | None = None):
47
54
  super().__init__(base_url, api_key)
48
55
  self.client = httpx.Client(base_url=self.base_url)
49
- self.upload_client = httpx.Client(timeout=60) # client for uploading image
56
+ self.upload_client = httpx.Client(timeout=HTTP_CLIENT_TIMEOUT)
50
57
  logger.info(f"SyncClient initialized with base_url: {self.base_url}")
51
58
 
52
59
  def __enter__(self):
@@ -124,7 +131,10 @@ class SyncClient(BaseClient[httpx.Client]):
124
131
  # Make request
125
132
  try:
126
133
  response = self.client.post(
127
- "/v2/message", json=payload, headers=headers, timeout=self.timeout
134
+ API_V2_MESSAGE_ENDPOINT,
135
+ json=payload,
136
+ headers=headers,
137
+ timeout=self.timeout,
128
138
  )
129
139
  return self._process_response(response)
130
140
  except (httpx.TimeoutException, httpx.NetworkError) as e:
@@ -139,7 +149,7 @@ class SyncClient(BaseClient[httpx.Client]):
139
149
  """
140
150
  logger.debug("Making health check request")
141
151
  try:
142
- response = self.client.get("/health")
152
+ response = self.client.get(API_HEALTH_ENDPOINT)
143
153
  response.raise_for_status()
144
154
  result = response.json()
145
155
  logger.debug("Health check successful")
@@ -161,12 +171,12 @@ class SyncClient(BaseClient[httpx.Client]):
161
171
  Returns:
162
172
  UploadFileResponse: The response from /v1/file/upload with uuid and presigned S3 URL
163
173
  """
164
- logger.debug("Making API request to /v1/file/upload")
174
+ logger.debug(f"Making API request to {API_V1_FILE_UPLOAD_ENDPOINT}")
165
175
 
166
176
  try:
167
177
  headers = self._build_headers(api_version)
168
178
  response = self.client.get(
169
- "/v1/file/upload", headers=headers, timeout=self.timeout
179
+ API_V1_FILE_UPLOAD_ENDPOINT, headers=headers, timeout=self.timeout
170
180
  )
171
181
  return self._process_upload_response(response)
172
182
  except (httpx.TimeoutException, httpx.NetworkError, httpx.HTTPStatusError) as e:
@@ -286,7 +296,10 @@ class SyncClient(BaseClient[httpx.Client]):
286
296
  # Make request
287
297
  try:
288
298
  response = self.client.post(
289
- "/v1/generate", json=payload, headers=headers, timeout=self.timeout
299
+ API_V1_GENERATE_ENDPOINT,
300
+ json=payload,
301
+ headers=headers,
302
+ timeout=self.timeout,
290
303
  )
291
304
  return self._process_generate_response(response)
292
305
  except (httpx.TimeoutException, httpx.NetworkError) as e:
oagi/constants.py ADDED
@@ -0,0 +1,43 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ # URLs & API Endpoints
10
+ DEFAULT_BASE_URL = "https://api.agiopen.org"
11
+ API_KEY_HELP_URL = "https://developer.agiopen.org/api-keys"
12
+ API_V2_MESSAGE_ENDPOINT = "/v2/message"
13
+ API_V1_FILE_UPLOAD_ENDPOINT = "/v1/file/upload"
14
+ API_V1_GENERATE_ENDPOINT = "/v1/generate"
15
+ API_HEALTH_ENDPOINT = "/health"
16
+
17
+ # Model identifiers
18
+ MODEL_ACTOR = "lux-actor-1"
19
+ MODEL_THINKER = "lux-thinker-1"
20
+
21
+ # Agent modes
22
+ MODE_ACTOR = "actor"
23
+ MODE_THINKER = "thinker"
24
+ MODE_TASKER = "tasker"
25
+
26
+ # Default max steps per model
27
+ DEFAULT_MAX_STEPS = 20
28
+ DEFAULT_MAX_STEPS_THINKER = 100
29
+ DEFAULT_MAX_STEPS_TASKER = 60
30
+
31
+ # Reflection intervals
32
+ DEFAULT_REFLECTION_INTERVAL = 4
33
+ DEFAULT_REFLECTION_INTERVAL_TASKER = 20
34
+
35
+ # Timing & Delays
36
+ DEFAULT_STEP_DELAY = 0.3
37
+
38
+ # Temperature Defaults
39
+ DEFAULT_TEMPERATURE = 0.5
40
+ DEFAULT_TEMPERATURE_LOW = 0.1
41
+
42
+ # Timeout Values
43
+ HTTP_CLIENT_TIMEOUT = 60
oagi/handler/__init__.py CHANGED
@@ -14,6 +14,21 @@ from oagi.handler.pyautogui_action_handler import (
14
14
  )
15
15
  from oagi.handler.screenshot_maker import ScreenshotMaker
16
16
 
17
+
18
+ def reset_handler(handler) -> None:
19
+ """Reset handler state if supported.
20
+
21
+ Uses duck-typing to check if the handler has a reset() method.
22
+ This allows handlers to reset their internal state (e.g., capslock state)
23
+ at the start of a new automation task.
24
+
25
+ Args:
26
+ handler: The action handler to reset
27
+ """
28
+ if hasattr(handler, "reset"):
29
+ handler.reset()
30
+
31
+
17
32
  __all__ = [
18
33
  "PILImage",
19
34
  "PyautoguiActionHandler",
@@ -21,4 +36,5 @@ __all__ = [
21
36
  "AsyncPyautoguiActionHandler",
22
37
  "ScreenshotMaker",
23
38
  "AsyncScreenshotMaker",
39
+ "reset_handler",
24
40
  ]
oagi/handler/_macos.py CHANGED
@@ -6,6 +6,15 @@
6
6
  # Licensed under the MIT License.
7
7
  # -----------------------------------------------------------------------------
8
8
 
9
+ """macOS-specific keyboard and mouse input handling.
10
+
11
+ This module provides:
12
+ - macos_click(): Fix for PyAutoGUI multi-click bug on macOS
13
+ - typewrite_exact(): Type text exactly, ignoring system capslock state
14
+ """
15
+
16
+ import time
17
+
9
18
  import pyautogui
10
19
 
11
20
  from ..exceptions import check_optional_dependency
@@ -13,6 +22,134 @@ from ..exceptions import check_optional_dependency
13
22
  check_optional_dependency("Quartz", "macOS multiple clicks", "desktop")
14
23
  import Quartz # noqa: E402
15
24
 
25
+ # macOS virtual key codes for typeable characters
26
+ KEYCODE_MAP = {
27
+ "a": 0x00,
28
+ "b": 0x0B,
29
+ "c": 0x08,
30
+ "d": 0x02,
31
+ "e": 0x0E,
32
+ "f": 0x03,
33
+ "g": 0x05,
34
+ "h": 0x04,
35
+ "i": 0x22,
36
+ "j": 0x26,
37
+ "k": 0x28,
38
+ "l": 0x25,
39
+ "m": 0x2E,
40
+ "n": 0x2D,
41
+ "o": 0x1F,
42
+ "p": 0x23,
43
+ "q": 0x0C,
44
+ "r": 0x0F,
45
+ "s": 0x01,
46
+ "t": 0x11,
47
+ "u": 0x20,
48
+ "v": 0x09,
49
+ "w": 0x0D,
50
+ "x": 0x07,
51
+ "y": 0x10,
52
+ "z": 0x06,
53
+ "1": 0x12,
54
+ "2": 0x13,
55
+ "3": 0x14,
56
+ "4": 0x15,
57
+ "5": 0x17,
58
+ "6": 0x16,
59
+ "7": 0x1A,
60
+ "8": 0x1C,
61
+ "9": 0x19,
62
+ "0": 0x1D,
63
+ " ": 0x31, # space
64
+ "-": 0x1B,
65
+ "=": 0x18,
66
+ "[": 0x21,
67
+ "]": 0x1E,
68
+ "\\": 0x2A,
69
+ ";": 0x29,
70
+ "'": 0x27,
71
+ "`": 0x32,
72
+ ",": 0x2B,
73
+ ".": 0x2F,
74
+ "/": 0x2C,
75
+ "\t": 0x30, # tab
76
+ "\n": 0x24, # return
77
+ }
78
+
79
+ # Characters that require shift key (on US keyboard layout)
80
+ SHIFT_CHARS = set('~!@#$%^&*()_+{}|:"<>?ABCDEFGHIJKLMNOPQRSTUVWXYZ')
81
+
82
+ # Mapping of shifted characters to their base key
83
+ SHIFT_KEY_MAP = {
84
+ "~": "`",
85
+ "!": "1",
86
+ "@": "2",
87
+ "#": "3",
88
+ "$": "4",
89
+ "%": "5",
90
+ "^": "6",
91
+ "&": "7",
92
+ "*": "8",
93
+ "(": "9",
94
+ ")": "0",
95
+ "_": "-",
96
+ "+": "=",
97
+ "{": "[",
98
+ "}": "]",
99
+ "|": "\\",
100
+ ":": ";",
101
+ '"': "'",
102
+ "<": ",",
103
+ ">": ".",
104
+ "?": "/",
105
+ }
106
+
107
+
108
+ def typewrite_exact(text: str, interval: float = 0.01) -> None:
109
+ """Type text exactly as specified, ignoring system capslock state.
110
+
111
+ This function uses Quartz CGEventCreateKeyboardEvent with explicit
112
+ flag control via CGEventSetFlags() to type each character with the
113
+ correct case, regardless of the system's capslock state.
114
+
115
+ Args:
116
+ text: The text to type exactly as specified
117
+ interval: Time in seconds between each character (default: 0.01)
118
+ """
119
+ for char in text:
120
+ # Determine if this character needs shift
121
+ needs_shift = char in SHIFT_CHARS
122
+
123
+ # Get the base key (for shifted chars, look up the unshifted version)
124
+ if char.isupper():
125
+ base_char = char.lower()
126
+ elif char in SHIFT_KEY_MAP:
127
+ base_char = SHIFT_KEY_MAP[char]
128
+ else:
129
+ base_char = char
130
+
131
+ # Get keycode for the base character
132
+ keycode = KEYCODE_MAP.get(base_char)
133
+ if keycode is None:
134
+ # Character not in our keycode map, skip it
135
+ continue
136
+
137
+ # Set flags: shift if needed, otherwise clear all flags
138
+ flags = Quartz.kCGEventFlagMaskShift if needs_shift else 0
139
+
140
+ # Key down
141
+ event_down = Quartz.CGEventCreateKeyboardEvent(None, keycode, True)
142
+ Quartz.CGEventSetFlags(event_down, flags)
143
+ Quartz.CGEventPost(Quartz.kCGHIDEventTap, event_down)
144
+
145
+ # Key up
146
+ event_up = Quartz.CGEventCreateKeyboardEvent(None, keycode, False)
147
+ Quartz.CGEventSetFlags(event_up, flags)
148
+ Quartz.CGEventPost(Quartz.kCGHIDEventTap, event_up)
149
+
150
+ if interval > 0:
151
+ time.sleep(interval)
152
+
16
153
 
17
154
  def macos_click(x: int, y: int, clicks: int = 1) -> None:
18
155
  """
@@ -0,0 +1,101 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ """Windows-specific keyboard input handling.
10
+
11
+ This module provides typewrite_exact() which types text exactly as specified,
12
+ ignoring the system's capslock state by using SendInput with KEYEVENTF_UNICODE.
13
+ """
14
+
15
+ import ctypes
16
+ import time
17
+ from ctypes import wintypes
18
+
19
+ INPUT_KEYBOARD = 1
20
+ KEYEVENTF_UNICODE = 0x0004
21
+ KEYEVENTF_KEYUP = 0x0002
22
+
23
+
24
+ class KEYBDINPUT(ctypes.Structure):
25
+ _fields_ = [
26
+ ("wVk", wintypes.WORD),
27
+ ("wScan", wintypes.WORD),
28
+ ("dwFlags", wintypes.DWORD),
29
+ ("time", wintypes.DWORD),
30
+ ("dwExtraInfo", ctypes.POINTER(ctypes.c_ulong)),
31
+ ]
32
+
33
+
34
+ class MOUSEINPUT(ctypes.Structure):
35
+ _fields_ = [
36
+ ("dx", ctypes.c_long),
37
+ ("dy", ctypes.c_long),
38
+ ("mouseData", wintypes.DWORD),
39
+ ("dwFlags", wintypes.DWORD),
40
+ ("time", wintypes.DWORD),
41
+ ("dwExtraInfo", ctypes.POINTER(ctypes.c_ulong)),
42
+ ]
43
+
44
+
45
+ class HARDWAREINPUT(ctypes.Structure):
46
+ _fields_ = [
47
+ ("uMsg", wintypes.DWORD),
48
+ ("wParamL", wintypes.WORD),
49
+ ("wParamH", wintypes.WORD),
50
+ ]
51
+
52
+
53
+ class INPUT(ctypes.Structure):
54
+ class _I(ctypes.Union):
55
+ _fields_ = [
56
+ ("ki", KEYBDINPUT),
57
+ ("mi", MOUSEINPUT),
58
+ ("hi", HARDWAREINPUT),
59
+ ]
60
+
61
+ _anonymous_ = ("i",)
62
+ _fields_ = [
63
+ ("type", wintypes.DWORD),
64
+ ("i", _I),
65
+ ]
66
+
67
+
68
+ # Configure SendInput with proper argtypes for 64-bit compatibility
69
+ SendInput = ctypes.windll.user32.SendInput
70
+ SendInput.argtypes = [wintypes.UINT, ctypes.POINTER(INPUT), ctypes.c_int]
71
+ SendInput.restype = wintypes.UINT
72
+
73
+
74
+ def typewrite_exact(text: str, interval: float = 0.01) -> None:
75
+ """Type text exactly using Unicode input - ignores capslock, keyboard layout, etc.
76
+
77
+ This function uses SendInput with KEYEVENTF_UNICODE to send characters
78
+ directly by their Unicode codepoint, completely bypassing keyboard state
79
+ (capslock, layout, etc.).
80
+
81
+ Args:
82
+ text: The text to type exactly as specified
83
+ interval: Time in seconds between each character (default: 0.01)
84
+ """
85
+ for char in text:
86
+ inputs = (INPUT * 2)()
87
+
88
+ # Key down
89
+ inputs[0].type = INPUT_KEYBOARD
90
+ inputs[0].ki.wScan = ord(char)
91
+ inputs[0].ki.dwFlags = KEYEVENTF_UNICODE
92
+
93
+ # Key up
94
+ inputs[1].type = INPUT_KEYBOARD
95
+ inputs[1].ki.wScan = ord(char)
96
+ inputs[1].ki.dwFlags = KEYEVENTF_UNICODE | KEYEVENTF_KEYUP
97
+
98
+ SendInput(2, inputs, ctypes.sizeof(INPUT))
99
+
100
+ if interval > 0:
101
+ time.sleep(interval)
@@ -29,6 +29,14 @@ class AsyncPyautoguiActionHandler:
29
29
  self.sync_handler = PyautoguiActionHandler(config=config)
30
30
  self.config = config or PyautoguiConfig()
31
31
 
32
+ def reset(self):
33
+ """Reset handler state.
34
+
35
+ Delegates to the underlying synchronous handler's reset method.
36
+ Called at automation start/end and when FINISH action is received.
37
+ """
38
+ self.sync_handler.reset()
39
+
32
40
  async def __call__(self, actions: list[Action]) -> None:
33
41
  """
34
42
  Execute actions asynchronously using a thread pool executor.
@@ -0,0 +1,55 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+
10
+ class CapsLockManager:
11
+ """Manages caps lock state for text transformation.
12
+
13
+ This class maintains an internal caps lock state that can be toggled
14
+ independently of the system's caps lock state. This allows for consistent
15
+ text case handling during automation regardless of the system state.
16
+ """
17
+
18
+ def __init__(self, mode: str = "session"):
19
+ """Initialize caps lock manager.
20
+
21
+ Args:
22
+ mode: Either "session" (internal state) or "system" (OS-level)
23
+ """
24
+ self.mode = mode
25
+ self.caps_enabled = False
26
+
27
+ def reset(self):
28
+ """Reset caps lock state to default (off).
29
+
30
+ Called at automation start/end and when FINISH action is received.
31
+ """
32
+ self.caps_enabled = False
33
+
34
+ def toggle(self):
35
+ """Toggle caps lock state in session mode."""
36
+ if self.mode == "session":
37
+ self.caps_enabled = not self.caps_enabled
38
+
39
+ def transform_text(self, text: str) -> str:
40
+ """Transform text based on caps lock state.
41
+
42
+ Args:
43
+ text: Input text to transform
44
+
45
+ Returns:
46
+ Transformed text (uppercase alphabets if caps enabled in session mode)
47
+ """
48
+ if self.mode == "session" and self.caps_enabled:
49
+ # Transform letters to uppercase, preserve special characters
50
+ return "".join(c.upper() if c.isalpha() else c for c in text)
51
+ return text
52
+
53
+ def should_use_system_capslock(self) -> bool:
54
+ """Check if system-level caps lock should be used."""
55
+ return self.mode == "system"
@@ -13,48 +13,15 @@ from pydantic import BaseModel, Field
13
13
 
14
14
  from ..exceptions import check_optional_dependency
15
15
  from ..types import Action, ActionType, parse_coords, parse_drag_coords, parse_scroll
16
+ from .capslock_manager import CapsLockManager
16
17
 
17
18
  check_optional_dependency("pyautogui", "PyautoguiActionHandler", "desktop")
18
19
  import pyautogui # noqa: E402
19
20
 
20
21
  if sys.platform == "darwin":
21
22
  from . import _macos
22
-
23
-
24
- class CapsLockManager:
25
- """Manages caps lock state for text transformation."""
26
-
27
- def __init__(self, mode: str = "session"):
28
- """Initialize caps lock manager.
29
-
30
- Args:
31
- mode: Either "session" (internal state) or "system" (OS-level)
32
- """
33
- self.mode = mode
34
- self.caps_enabled = False
35
-
36
- def toggle(self):
37
- """Toggle caps lock state in session mode."""
38
- if self.mode == "session":
39
- self.caps_enabled = not self.caps_enabled
40
-
41
- def transform_text(self, text: str) -> str:
42
- """Transform text based on caps lock state.
43
-
44
- Args:
45
- text: Input text to transform
46
-
47
- Returns:
48
- Transformed text (uppercase if caps enabled in session mode)
49
- """
50
- if self.mode == "session" and self.caps_enabled:
51
- # Transform letters to uppercase, preserve special characters
52
- return "".join(c.upper() if c.isalpha() else c for c in text)
53
- return text
54
-
55
- def should_use_system_capslock(self) -> bool:
56
- """Check if system-level caps lock should be used."""
57
- return self.mode == "system"
23
+ elif sys.platform == "win32":
24
+ from . import _windows
58
25
 
59
26
 
60
27
  class PyautoguiConfig(BaseModel):
@@ -64,7 +31,8 @@ class PyautoguiConfig(BaseModel):
64
31
  default=0.5, description="Duration for drag operations in seconds"
65
32
  )
66
33
  scroll_amount: int = Field(
67
- default=30, description="Amount to scroll (positive for up, negative for down)"
34
+ default=2 if sys.platform == "darwin" else 100,
35
+ description="Amount to scroll (positive for up, negative for down)",
68
36
  )
69
37
  wait_duration: float = Field(
70
38
  default=1.0, description="Duration for wait actions in seconds"
@@ -110,6 +78,14 @@ class PyautoguiActionHandler:
110
78
  # Initialize caps lock manager
111
79
  self.caps_manager = CapsLockManager(mode=self.config.capslock_mode)
112
80
 
81
+ def reset(self):
82
+ """Reset handler state.
83
+
84
+ Called at automation start/end and when FINISH action is received.
85
+ Resets the internal capslock state.
86
+ """
87
+ self.caps_manager.reset()
88
+
113
89
  def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
114
90
  """Convert coordinates from 0-1000 range to actual screen coordinates.
115
91
 
@@ -237,7 +213,14 @@ class PyautoguiActionHandler:
237
213
  text = arg.strip("\"'")
238
214
  # Apply caps lock transformation if needed
239
215
  text = self.caps_manager.transform_text(text)
240
- pyautogui.typewrite(text)
216
+ # Use platform-specific typing that ignores system capslock
217
+ if sys.platform == "darwin":
218
+ _macos.typewrite_exact(text)
219
+ elif sys.platform == "win32":
220
+ _windows.typewrite_exact(text)
221
+ else:
222
+ # Fallback for other platforms
223
+ pyautogui.typewrite(text)
241
224
 
242
225
  case ActionType.SCROLL:
243
226
  x, y, direction = self._parse_scroll(arg)
@@ -250,8 +233,8 @@ class PyautoguiActionHandler:
250
233
  pyautogui.scroll(scroll_amount)
251
234
 
252
235
  case ActionType.FINISH:
253
- # Task completion - no action needed
254
- pass
236
+ # Task completion - reset handler state
237
+ self.reset()
255
238
 
256
239
  case ActionType.WAIT:
257
240
  # Wait for a short period
oagi/server/config.py CHANGED
@@ -8,6 +8,7 @@
8
8
 
9
9
  from pydantic import Field
10
10
 
11
+ from ..constants import DEFAULT_MAX_STEPS, MODEL_ACTOR
11
12
  from ..exceptions import check_optional_dependency
12
13
 
13
14
  check_optional_dependency("pydantic_settings", "Server features", "server")
@@ -20,7 +21,7 @@ class ServerConfig(BaseSettings):
20
21
  oagi_base_url: str = Field(default="https://api.agiopen.org", alias="OAGI_BASE_URL")
21
22
 
22
23
  # Server settings
23
- server_host: str = Field(default="0.0.0.0", alias="OAGI_SERVER_HOST")
24
+ server_host: str = Field(default="127.0.0.1", alias="OAGI_SERVER_HOST")
24
25
  server_port: int = Field(default=8000, alias="OAGI_SERVER_PORT")
25
26
  cors_allowed_origins: str = Field(default="*", alias="OAGI_CORS_ORIGINS")
26
27
 
@@ -28,11 +29,13 @@ class ServerConfig(BaseSettings):
28
29
  session_timeout_seconds: float = Field(default=10.0)
29
30
 
30
31
  # Model settings
31
- default_model: str = Field(default="lux-actor-1", alias="OAGI_DEFAULT_MODEL")
32
+ default_model: str = Field(default=MODEL_ACTOR, alias="OAGI_DEFAULT_MODEL")
32
33
  default_temperature: float = Field(default=0.5, ge=0.0, le=2.0)
33
34
 
34
35
  # Agent settings
35
- max_steps: int = Field(default=20, alias="OAGI_MAX_STEPS", ge=1, le=100)
36
+ max_steps: int = Field(
37
+ default=DEFAULT_MAX_STEPS, alias="OAGI_MAX_STEPS", ge=1, le=200
38
+ )
36
39
 
37
40
  # Socket.IO settings
38
41
  socketio_path: str = Field(default="/socket.io")
oagi/server/models.py CHANGED
@@ -10,13 +10,15 @@ from typing import Literal
10
10
 
11
11
  from pydantic import BaseModel, Field
12
12
 
13
+ from ..constants import DEFAULT_TEMPERATURE_LOW, MODE_ACTOR, MODEL_ACTOR
14
+
13
15
 
14
16
  # Client-to-server events
15
17
  class InitEventData(BaseModel):
16
18
  instruction: str = Field(...)
17
- mode: str | None = Field(default="actor")
18
- model: str | None = Field(default="lux-actor-1")
19
- temperature: float | None = Field(default=0.1, ge=0.0, le=2.0)
19
+ mode: str | None = Field(default=MODE_ACTOR)
20
+ model: str | None = Field(default=MODEL_ACTOR)
21
+ temperature: float | None = Field(default=DEFAULT_TEMPERATURE_LOW, ge=0.0, le=2.0)
20
22
 
21
23
 
22
24
  # Server-to-client events