oagi-core 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/__init__.py +76 -33
- oagi/agent/__init__.py +2 -0
- oagi/agent/default.py +45 -12
- oagi/agent/factories.py +22 -3
- oagi/agent/observer/__init__.py +38 -0
- oagi/agent/observer/agent_observer.py +99 -0
- oagi/agent/observer/events.py +28 -0
- oagi/agent/observer/exporters.py +445 -0
- oagi/agent/observer/protocol.py +12 -0
- oagi/agent/registry.py +2 -2
- oagi/agent/tasker/models.py +1 -0
- oagi/agent/tasker/planner.py +41 -9
- oagi/agent/tasker/taskee_agent.py +178 -86
- oagi/agent/tasker/tasker_agent.py +25 -14
- oagi/cli/agent.py +50 -9
- oagi/cli/tracking.py +27 -17
- oagi/cli/utils.py +11 -4
- oagi/client/base.py +3 -7
- oagi/handler/_macos.py +55 -0
- oagi/handler/pyautogui_action_handler.py +19 -2
- oagi/server/agent_wrappers.py +5 -5
- oagi/server/config.py +3 -3
- oagi/server/models.py +2 -2
- oagi/server/session_store.py +2 -2
- oagi/server/socketio_server.py +1 -1
- oagi/task/async_.py +13 -34
- oagi/task/async_short.py +2 -2
- oagi/task/base.py +41 -7
- oagi/task/short.py +2 -2
- oagi/task/sync.py +11 -34
- oagi/types/__init__.py +24 -4
- oagi/types/async_image_provider.py +3 -2
- oagi/types/image_provider.py +3 -2
- oagi/types/step_observer.py +75 -16
- oagi/types/url.py +3 -0
- {oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/METADATA +38 -25
- oagi_core-0.10.0.dist-info/RECORD +68 -0
- oagi/types/url_image.py +0 -47
- oagi_core-0.9.1.dist-info/RECORD +0 -62
- {oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/WHEEL +0 -0
- {oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/entry_points.txt +0 -0
- {oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/licenses/LICENSE +0 -0
oagi/client/base.py
CHANGED
|
@@ -41,16 +41,12 @@ class BaseClient(Generic[HttpClientT]):
|
|
|
41
41
|
|
|
42
42
|
def __init__(self, base_url: str | None = None, api_key: str | None = None):
|
|
43
43
|
# Get from environment if not provided
|
|
44
|
-
self.base_url =
|
|
44
|
+
self.base_url = (
|
|
45
|
+
base_url or os.getenv("OAGI_BASE_URL") or "https://api.agiopen.org"
|
|
46
|
+
)
|
|
45
47
|
self.api_key = api_key or os.getenv("OAGI_API_KEY")
|
|
46
48
|
|
|
47
49
|
# Validate required configuration
|
|
48
|
-
if not self.base_url:
|
|
49
|
-
raise ConfigurationError(
|
|
50
|
-
"OAGI base URL must be provided either as 'base_url' parameter or "
|
|
51
|
-
"OAGI_BASE_URL environment variable"
|
|
52
|
-
)
|
|
53
|
-
|
|
54
50
|
if not self.api_key:
|
|
55
51
|
raise ConfigurationError(
|
|
56
52
|
"OAGI API key must be provided either as 'api_key' parameter or "
|
oagi/handler/_macos.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
import pyautogui
|
|
10
|
+
|
|
11
|
+
from ..exceptions import check_optional_dependency
|
|
12
|
+
|
|
13
|
+
check_optional_dependency("Quartz", "macOS multiple clicks", "desktop")
|
|
14
|
+
import Quartz # noqa: E402
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def macos_click(x: int, y: int, clicks: int = 1) -> None:
|
|
18
|
+
"""
|
|
19
|
+
Execute a mouse click sequence on macOS with correct click state.
|
|
20
|
+
|
|
21
|
+
This avoids the PyAutoGUI bug where multi-clicks are sent as separate
|
|
22
|
+
single clicks (clickState=1), which macOS interprets as distinct events
|
|
23
|
+
rather than double/triple clicks.
|
|
24
|
+
|
|
25
|
+
Check https://github.com/asweigart/pyautogui/issues/672
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
x: X coordinate
|
|
29
|
+
y: Y coordinate
|
|
30
|
+
clicks: Number of clicks (1=single, 2=double, 3=triple)
|
|
31
|
+
"""
|
|
32
|
+
# Move to position first using pyautogui to ensure consistency
|
|
33
|
+
pyautogui.moveTo(x, y)
|
|
34
|
+
|
|
35
|
+
point = Quartz.CGPoint(x=x, y=y)
|
|
36
|
+
|
|
37
|
+
# Create and post events for each click in the sequence
|
|
38
|
+
for i in range(1, clicks + 1):
|
|
39
|
+
# Create Down/Up events
|
|
40
|
+
mouse_down = Quartz.CGEventCreateMouseEvent(
|
|
41
|
+
None, Quartz.kCGEventLeftMouseDown, point, Quartz.kCGMouseButtonLeft
|
|
42
|
+
)
|
|
43
|
+
mouse_up = Quartz.CGEventCreateMouseEvent(
|
|
44
|
+
None, Quartz.kCGEventLeftMouseUp, point, Quartz.kCGMouseButtonLeft
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Set the click state (1 for first click, 2 for second, etc.)
|
|
48
|
+
Quartz.CGEventSetIntegerValueField(
|
|
49
|
+
mouse_down, Quartz.kCGMouseEventClickState, i
|
|
50
|
+
)
|
|
51
|
+
Quartz.CGEventSetIntegerValueField(mouse_up, Quartz.kCGMouseEventClickState, i)
|
|
52
|
+
|
|
53
|
+
# Post events
|
|
54
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, mouse_down)
|
|
55
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, mouse_up)
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
8
|
|
|
9
9
|
import re
|
|
10
|
+
import sys
|
|
10
11
|
import time
|
|
11
12
|
|
|
12
13
|
from pydantic import BaseModel, Field
|
|
@@ -17,6 +18,9 @@ from ..types import Action, ActionType
|
|
|
17
18
|
check_optional_dependency("pyautogui", "PyautoguiActionHandler", "desktop")
|
|
18
19
|
import pyautogui # noqa: E402
|
|
19
20
|
|
|
21
|
+
if sys.platform == "darwin":
|
|
22
|
+
from . import _macos
|
|
23
|
+
|
|
20
24
|
|
|
21
25
|
class CapsLockManager:
|
|
22
26
|
"""Manages caps lock state for text transformation."""
|
|
@@ -76,6 +80,10 @@ class PyautoguiConfig(BaseModel):
|
|
|
76
80
|
default="session",
|
|
77
81
|
description="Caps lock handling mode: 'session' (internal state) or 'system' (OS-level)",
|
|
78
82
|
)
|
|
83
|
+
macos_ctrl_to_cmd: bool = Field(
|
|
84
|
+
default=True,
|
|
85
|
+
description="Replace 'ctrl' with 'command' in hotkey combinations on macOS",
|
|
86
|
+
)
|
|
79
87
|
|
|
80
88
|
|
|
81
89
|
class PyautoguiActionHandler:
|
|
@@ -165,6 +173,9 @@ class PyautoguiActionHandler:
|
|
|
165
173
|
# Normalize caps lock variations
|
|
166
174
|
if key in ["caps_lock", "caps", "capslock"]:
|
|
167
175
|
return "capslock"
|
|
176
|
+
# Remap ctrl to command on macOS if enabled
|
|
177
|
+
if self.config.macos_ctrl_to_cmd and sys.platform == "darwin" and key == "ctrl":
|
|
178
|
+
return "command"
|
|
168
179
|
return key
|
|
169
180
|
|
|
170
181
|
def _parse_hotkey(self, args_str: str) -> list[str]:
|
|
@@ -186,11 +197,17 @@ class PyautoguiActionHandler:
|
|
|
186
197
|
|
|
187
198
|
case ActionType.LEFT_DOUBLE:
|
|
188
199
|
x, y = self._parse_coords(arg)
|
|
189
|
-
|
|
200
|
+
if sys.platform == "darwin":
|
|
201
|
+
_macos.macos_click(x, y, clicks=2)
|
|
202
|
+
else:
|
|
203
|
+
pyautogui.doubleClick(x, y)
|
|
190
204
|
|
|
191
205
|
case ActionType.LEFT_TRIPLE:
|
|
192
206
|
x, y = self._parse_coords(arg)
|
|
193
|
-
|
|
207
|
+
if sys.platform == "darwin":
|
|
208
|
+
_macos.macos_click(x, y, clicks=3)
|
|
209
|
+
else:
|
|
210
|
+
pyautogui.tripleClick(x, y)
|
|
194
211
|
|
|
195
212
|
case ActionType.RIGHT_SINGLE:
|
|
196
213
|
x, y = self._parse_coords(arg)
|
oagi/server/agent_wrappers.py
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
import logging
|
|
10
10
|
from typing import TYPE_CHECKING
|
|
11
11
|
|
|
12
|
-
from ..types import
|
|
12
|
+
from ..types import URL
|
|
13
13
|
from ..types.models.action import Action
|
|
14
14
|
from .models import ScreenshotRequestData, ScreenshotResponseData
|
|
15
15
|
|
|
@@ -56,7 +56,7 @@ class SocketIOImageProvider:
|
|
|
56
56
|
self.oagi_client = oagi_client
|
|
57
57
|
self._last_url: str | None = None
|
|
58
58
|
|
|
59
|
-
async def __call__(self) ->
|
|
59
|
+
async def __call__(self) -> URL:
|
|
60
60
|
logger.debug("Requesting screenshot via Socket.IO")
|
|
61
61
|
|
|
62
62
|
# Get S3 presigned URL from OAGI
|
|
@@ -87,12 +87,12 @@ class SocketIOImageProvider:
|
|
|
87
87
|
self.session.current_screenshot_url = upload_response.download_url
|
|
88
88
|
|
|
89
89
|
logger.debug(f"Screenshot captured successfully: {upload_response.uuid}")
|
|
90
|
-
return
|
|
90
|
+
return URL(upload_response.download_url)
|
|
91
91
|
|
|
92
|
-
async def last_image(self) ->
|
|
92
|
+
async def last_image(self) -> URL:
|
|
93
93
|
if self._last_url:
|
|
94
94
|
logger.debug("Returning last captured screenshot")
|
|
95
|
-
return
|
|
95
|
+
return URL(self._last_url)
|
|
96
96
|
|
|
97
97
|
logger.debug("No previous screenshot, capturing new one")
|
|
98
98
|
return await self()
|
oagi/server/config.py
CHANGED
|
@@ -28,11 +28,11 @@ class ServerConfig(BaseSettings):
|
|
|
28
28
|
session_timeout_seconds: float = Field(default=10.0)
|
|
29
29
|
|
|
30
30
|
# Model settings
|
|
31
|
-
default_model: str = Field(default="lux-
|
|
32
|
-
default_temperature: float = Field(default=0.
|
|
31
|
+
default_model: str = Field(default="lux-actor-1", alias="OAGI_DEFAULT_MODEL")
|
|
32
|
+
default_temperature: float = Field(default=0.5, ge=0.0, le=2.0)
|
|
33
33
|
|
|
34
34
|
# Agent settings
|
|
35
|
-
max_steps: int = Field(default=
|
|
35
|
+
max_steps: int = Field(default=20, alias="OAGI_MAX_STEPS", ge=1, le=100)
|
|
36
36
|
|
|
37
37
|
# Socket.IO settings
|
|
38
38
|
socketio_path: str = Field(default="/socket.io")
|
oagi/server/models.py
CHANGED
|
@@ -15,7 +15,7 @@ from pydantic import BaseModel, Field
|
|
|
15
15
|
class InitEventData(BaseModel):
|
|
16
16
|
instruction: str = Field(...)
|
|
17
17
|
mode: str | None = Field(default="actor")
|
|
18
|
-
model: str | None = Field(default="lux-
|
|
18
|
+
model: str | None = Field(default="lux-actor-1")
|
|
19
19
|
temperature: float | None = Field(default=0.1, ge=0.0, le=2.0)
|
|
20
20
|
|
|
21
21
|
|
|
@@ -75,7 +75,7 @@ class ScreenshotResponseData(BaseModel):
|
|
|
75
75
|
|
|
76
76
|
# Action acknowledgement
|
|
77
77
|
class ActionAckData(BaseModel):
|
|
78
|
-
|
|
78
|
+
index: int = Field(...)
|
|
79
79
|
success: bool = Field(...)
|
|
80
80
|
error: str | None = Field(None)
|
|
81
81
|
execution_time_ms: int | None = Field(None)
|
oagi/server/session_store.py
CHANGED
|
@@ -18,7 +18,7 @@ class Session:
|
|
|
18
18
|
session_id: str,
|
|
19
19
|
instruction: str,
|
|
20
20
|
mode: str = "actor",
|
|
21
|
-
model: str = "lux-
|
|
21
|
+
model: str = "lux-actor-1",
|
|
22
22
|
temperature: float = 0.0,
|
|
23
23
|
):
|
|
24
24
|
self.session_id: str = session_id
|
|
@@ -54,7 +54,7 @@ class SessionStore:
|
|
|
54
54
|
self,
|
|
55
55
|
instruction: str,
|
|
56
56
|
mode: str = "actor",
|
|
57
|
-
model: str = "lux-
|
|
57
|
+
model: str = "lux-actor-1",
|
|
58
58
|
temperature: float = 0.0,
|
|
59
59
|
session_id: str | None = None,
|
|
60
60
|
) -> str:
|
oagi/server/socketio_server.py
CHANGED
|
@@ -224,7 +224,7 @@ class SessionNamespace(socketio.AsyncNamespace):
|
|
|
224
224
|
# Emit finish event
|
|
225
225
|
await self.call(
|
|
226
226
|
"finish",
|
|
227
|
-
FinishEventData(
|
|
227
|
+
FinishEventData(index=0, total=1).model_dump(),
|
|
228
228
|
to=session.socket_id,
|
|
229
229
|
timeout=self.config.socketio_timeout,
|
|
230
230
|
)
|
oagi/task/async_.py
CHANGED
|
@@ -9,21 +9,18 @@
|
|
|
9
9
|
import warnings
|
|
10
10
|
|
|
11
11
|
from ..client import AsyncClient
|
|
12
|
-
from ..
|
|
13
|
-
from
|
|
14
|
-
from .base import BaseTask
|
|
12
|
+
from ..types import URL, Image, Step
|
|
13
|
+
from .base import BaseActor
|
|
15
14
|
|
|
16
|
-
logger = get_logger("async_task")
|
|
17
15
|
|
|
18
|
-
|
|
19
|
-
class AsyncActor(BaseTask):
|
|
16
|
+
class AsyncActor(BaseActor):
|
|
20
17
|
"""Async base class for task automation with the OAGI API."""
|
|
21
18
|
|
|
22
19
|
def __init__(
|
|
23
20
|
self,
|
|
24
21
|
api_key: str | None = None,
|
|
25
22
|
base_url: str | None = None,
|
|
26
|
-
model: str = "
|
|
23
|
+
model: str = "lux-actor-1",
|
|
27
24
|
temperature: float | None = None,
|
|
28
25
|
):
|
|
29
26
|
super().__init__(api_key, base_url, model, temperature)
|
|
@@ -34,19 +31,19 @@ class AsyncActor(BaseTask):
|
|
|
34
31
|
async def init_task(
|
|
35
32
|
self,
|
|
36
33
|
task_desc: str,
|
|
37
|
-
max_steps: int =
|
|
34
|
+
max_steps: int = 20,
|
|
38
35
|
):
|
|
39
36
|
"""Initialize a new task with the given description.
|
|
40
37
|
|
|
41
38
|
Args:
|
|
42
39
|
task_desc: Task description
|
|
43
|
-
max_steps: Maximum number of steps
|
|
40
|
+
max_steps: Maximum number of steps allowed
|
|
44
41
|
"""
|
|
45
42
|
self._prepare_init_task(task_desc, max_steps)
|
|
46
43
|
|
|
47
44
|
async def step(
|
|
48
45
|
self,
|
|
49
|
-
screenshot: Image | bytes,
|
|
46
|
+
screenshot: Image | URL | bytes,
|
|
50
47
|
instruction: str | None = None,
|
|
51
48
|
temperature: float | None = None,
|
|
52
49
|
) -> Step:
|
|
@@ -60,33 +57,15 @@ class AsyncActor(BaseTask):
|
|
|
60
57
|
Returns:
|
|
61
58
|
Step: The actions and reasoning for this step
|
|
62
59
|
"""
|
|
63
|
-
self.
|
|
64
|
-
|
|
60
|
+
kwargs = self._prepare_step(
|
|
61
|
+
screenshot, instruction, temperature, prefix="async "
|
|
62
|
+
)
|
|
65
63
|
|
|
66
64
|
try:
|
|
67
|
-
|
|
68
|
-
temp = self._get_temperature(temperature)
|
|
69
|
-
|
|
70
|
-
# Prepare screenshot kwargs (handles URLImage vs bytes/Image)
|
|
71
|
-
screenshot_kwargs = self._prepare_screenshot_kwargs(screenshot)
|
|
72
|
-
|
|
73
|
-
# Call API with dynamically determined screenshot argument
|
|
74
|
-
response = await self.client.create_message(
|
|
75
|
-
model=self.model,
|
|
76
|
-
task_description=self.task_description,
|
|
77
|
-
task_id=self.task_id,
|
|
78
|
-
instruction=instruction,
|
|
79
|
-
messages_history=self.message_history,
|
|
80
|
-
temperature=temp,
|
|
81
|
-
**screenshot_kwargs,
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
# Convert API response to Step (also updates message_history)
|
|
65
|
+
response = await self.client.create_message(**kwargs)
|
|
85
66
|
return self._build_step_response(response, prefix="Async ")
|
|
86
|
-
|
|
87
67
|
except Exception as e:
|
|
88
|
-
|
|
89
|
-
raise
|
|
68
|
+
self._handle_step_error(e, prefix="async ")
|
|
90
69
|
|
|
91
70
|
async def close(self):
|
|
92
71
|
"""Close the underlying HTTP client to free resources."""
|
|
@@ -110,7 +89,7 @@ class AsyncTask(AsyncActor):
|
|
|
110
89
|
self,
|
|
111
90
|
api_key: str | None = None,
|
|
112
91
|
base_url: str | None = None,
|
|
113
|
-
model: str = "
|
|
92
|
+
model: str = "lux-actor-1",
|
|
114
93
|
temperature: float | None = None,
|
|
115
94
|
):
|
|
116
95
|
warnings.warn(
|
oagi/task/async_short.py
CHANGED
|
@@ -27,7 +27,7 @@ class AsyncShortTask(AsyncActor, BaseAutoMode):
|
|
|
27
27
|
self,
|
|
28
28
|
api_key: str | None = None,
|
|
29
29
|
base_url: str | None = None,
|
|
30
|
-
model: str = "
|
|
30
|
+
model: str = "lux-actor-1",
|
|
31
31
|
temperature: float | None = None,
|
|
32
32
|
):
|
|
33
33
|
warnings.warn(
|
|
@@ -43,7 +43,7 @@ class AsyncShortTask(AsyncActor, BaseAutoMode):
|
|
|
43
43
|
async def auto_mode(
|
|
44
44
|
self,
|
|
45
45
|
task_desc: str,
|
|
46
|
-
max_steps: int =
|
|
46
|
+
max_steps: int = 20,
|
|
47
47
|
executor: AsyncActionHandler = None,
|
|
48
48
|
image_provider: AsyncImageProvider = None,
|
|
49
49
|
temperature: float | None = None,
|
oagi/task/base.py
CHANGED
|
@@ -9,14 +9,14 @@
|
|
|
9
9
|
from uuid import uuid4
|
|
10
10
|
|
|
11
11
|
from ..logging import get_logger
|
|
12
|
-
from ..types import Image, Step
|
|
12
|
+
from ..types import URL, Image, Step
|
|
13
13
|
from ..types.models import LLMResponse
|
|
14
14
|
|
|
15
15
|
logger = get_logger("task.base")
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
class
|
|
19
|
-
"""Base class with shared task management logic for sync/async
|
|
18
|
+
class BaseActor:
|
|
19
|
+
"""Base class with shared task management logic for sync/async actors."""
|
|
20
20
|
|
|
21
21
|
def __init__(
|
|
22
22
|
self,
|
|
@@ -30,6 +30,8 @@ class BaseTask:
|
|
|
30
30
|
self.model = model
|
|
31
31
|
self.temperature = temperature
|
|
32
32
|
self.message_history: list = [] # OpenAI-compatible message history
|
|
33
|
+
self.max_steps: int = 20 # Maximum steps allowed
|
|
34
|
+
self.current_step: int = 0 # Current step counter
|
|
33
35
|
# Client will be set by subclasses
|
|
34
36
|
self.api_key: str | None = None
|
|
35
37
|
self.base_url: str | None = None
|
|
@@ -48,11 +50,43 @@ class BaseTask:
|
|
|
48
50
|
self.task_id = uuid4().hex
|
|
49
51
|
self.task_description = task_desc
|
|
50
52
|
self.message_history = []
|
|
53
|
+
self.max_steps = max_steps
|
|
54
|
+
self.current_step = 0
|
|
51
55
|
logger.info(f"Task initialized: '{task_desc}' (max_steps: {max_steps})")
|
|
52
56
|
|
|
53
|
-
def
|
|
57
|
+
def _validate_and_increment_step(self):
|
|
54
58
|
if not self.task_description:
|
|
55
59
|
raise ValueError("Task description must be set. Call init_task() first.")
|
|
60
|
+
if self.current_step >= self.max_steps:
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"Max steps limit ({self.max_steps}) reached. "
|
|
63
|
+
"Call init_task() to start a new task."
|
|
64
|
+
)
|
|
65
|
+
self.current_step += 1
|
|
66
|
+
|
|
67
|
+
def _prepare_step(
|
|
68
|
+
self,
|
|
69
|
+
screenshot: Image | URL | bytes,
|
|
70
|
+
instruction: str | None,
|
|
71
|
+
temperature: float | None,
|
|
72
|
+
prefix: str = "",
|
|
73
|
+
) -> dict:
|
|
74
|
+
self._validate_and_increment_step()
|
|
75
|
+
self._log_step_execution(prefix=prefix)
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
"model": self.model,
|
|
79
|
+
"task_description": self.task_description,
|
|
80
|
+
"task_id": self.task_id,
|
|
81
|
+
"instruction": instruction,
|
|
82
|
+
"messages_history": self.message_history,
|
|
83
|
+
"temperature": self._get_temperature(temperature),
|
|
84
|
+
**self._prepare_screenshot_kwargs(screenshot),
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
def _handle_step_error(self, error: Exception, prefix: str = ""):
|
|
88
|
+
logger.error(f"Error during {prefix}step execution: {error}")
|
|
89
|
+
raise
|
|
56
90
|
|
|
57
91
|
def _prepare_screenshot(self, screenshot: Image | bytes) -> bytes:
|
|
58
92
|
if isinstance(screenshot, Image):
|
|
@@ -62,9 +96,9 @@ class BaseTask:
|
|
|
62
96
|
def _get_temperature(self, temperature: float | None) -> float | None:
|
|
63
97
|
return temperature if temperature is not None else self.temperature
|
|
64
98
|
|
|
65
|
-
def _prepare_screenshot_kwargs(self, screenshot: Image | bytes) -> dict:
|
|
66
|
-
if isinstance(screenshot,
|
|
67
|
-
return {"screenshot_url": screenshot
|
|
99
|
+
def _prepare_screenshot_kwargs(self, screenshot: Image | URL | bytes) -> dict:
|
|
100
|
+
if isinstance(screenshot, str):
|
|
101
|
+
return {"screenshot_url": screenshot}
|
|
68
102
|
return {"screenshot": self._prepare_screenshot(screenshot)}
|
|
69
103
|
|
|
70
104
|
def _handle_response_message_history(self, response: LLMResponse):
|
oagi/task/short.py
CHANGED
|
@@ -27,7 +27,7 @@ class ShortTask(Actor, BaseAutoMode):
|
|
|
27
27
|
self,
|
|
28
28
|
api_key: str | None = None,
|
|
29
29
|
base_url: str | None = None,
|
|
30
|
-
model: str = "
|
|
30
|
+
model: str = "lux-actor-1",
|
|
31
31
|
temperature: float | None = None,
|
|
32
32
|
):
|
|
33
33
|
warnings.warn(
|
|
@@ -43,7 +43,7 @@ class ShortTask(Actor, BaseAutoMode):
|
|
|
43
43
|
def auto_mode(
|
|
44
44
|
self,
|
|
45
45
|
task_desc: str,
|
|
46
|
-
max_steps: int =
|
|
46
|
+
max_steps: int = 20,
|
|
47
47
|
executor: ActionHandler = None,
|
|
48
48
|
image_provider: ImageProvider = None,
|
|
49
49
|
temperature: float | None = None,
|
oagi/task/sync.py
CHANGED
|
@@ -9,21 +9,18 @@
|
|
|
9
9
|
import warnings
|
|
10
10
|
|
|
11
11
|
from ..client import SyncClient
|
|
12
|
-
from ..
|
|
13
|
-
from
|
|
14
|
-
from .base import BaseTask
|
|
12
|
+
from ..types import URL, Image, Step
|
|
13
|
+
from .base import BaseActor
|
|
15
14
|
|
|
16
|
-
logger = get_logger("task")
|
|
17
15
|
|
|
18
|
-
|
|
19
|
-
class Actor(BaseTask):
|
|
16
|
+
class Actor(BaseActor):
|
|
20
17
|
"""Base class for task automation with the OAGI API."""
|
|
21
18
|
|
|
22
19
|
def __init__(
|
|
23
20
|
self,
|
|
24
21
|
api_key: str | None = None,
|
|
25
22
|
base_url: str | None = None,
|
|
26
|
-
model: str = "
|
|
23
|
+
model: str = "lux-actor-1",
|
|
27
24
|
temperature: float | None = None,
|
|
28
25
|
):
|
|
29
26
|
super().__init__(api_key, base_url, model, temperature)
|
|
@@ -34,19 +31,19 @@ class Actor(BaseTask):
|
|
|
34
31
|
def init_task(
|
|
35
32
|
self,
|
|
36
33
|
task_desc: str,
|
|
37
|
-
max_steps: int =
|
|
34
|
+
max_steps: int = 20,
|
|
38
35
|
):
|
|
39
36
|
"""Initialize a new task with the given description.
|
|
40
37
|
|
|
41
38
|
Args:
|
|
42
39
|
task_desc: Task description
|
|
43
|
-
max_steps: Maximum number of steps
|
|
40
|
+
max_steps: Maximum number of steps allowed
|
|
44
41
|
"""
|
|
45
42
|
self._prepare_init_task(task_desc, max_steps)
|
|
46
43
|
|
|
47
44
|
def step(
|
|
48
45
|
self,
|
|
49
|
-
screenshot: Image | bytes,
|
|
46
|
+
screenshot: Image | URL | bytes,
|
|
50
47
|
instruction: str | None = None,
|
|
51
48
|
temperature: float | None = None,
|
|
52
49
|
) -> Step:
|
|
@@ -60,33 +57,13 @@ class Actor(BaseTask):
|
|
|
60
57
|
Returns:
|
|
61
58
|
Step: The actions and reasoning for this step
|
|
62
59
|
"""
|
|
63
|
-
self.
|
|
64
|
-
self._log_step_execution()
|
|
60
|
+
kwargs = self._prepare_step(screenshot, instruction, temperature)
|
|
65
61
|
|
|
66
62
|
try:
|
|
67
|
-
|
|
68
|
-
temp = self._get_temperature(temperature)
|
|
69
|
-
|
|
70
|
-
# Prepare screenshot kwargs (handles URLImage vs bytes/Image)
|
|
71
|
-
screenshot_kwargs = self._prepare_screenshot_kwargs(screenshot)
|
|
72
|
-
|
|
73
|
-
# Call API with dynamically determined screenshot argument
|
|
74
|
-
response = self.client.create_message(
|
|
75
|
-
model=self.model,
|
|
76
|
-
task_description=self.task_description,
|
|
77
|
-
task_id=self.task_id,
|
|
78
|
-
instruction=instruction,
|
|
79
|
-
messages_history=self.message_history,
|
|
80
|
-
temperature=temp,
|
|
81
|
-
**screenshot_kwargs,
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
# Convert API response to Step (also updates message_history)
|
|
63
|
+
response = self.client.create_message(**kwargs)
|
|
85
64
|
return self._build_step_response(response)
|
|
86
|
-
|
|
87
65
|
except Exception as e:
|
|
88
|
-
|
|
89
|
-
raise
|
|
66
|
+
self._handle_step_error(e)
|
|
90
67
|
|
|
91
68
|
def close(self):
|
|
92
69
|
"""Close the underlying HTTP client to free resources."""
|
|
@@ -110,7 +87,7 @@ class Task(Actor):
|
|
|
110
87
|
self,
|
|
111
88
|
api_key: str | None = None,
|
|
112
89
|
base_url: str | None = None,
|
|
113
|
-
model: str = "
|
|
90
|
+
model: str = "lux-actor-1",
|
|
114
91
|
temperature: float | None = None,
|
|
115
92
|
):
|
|
116
93
|
warnings.warn(
|
oagi/types/__init__.py
CHANGED
|
@@ -12,19 +12,39 @@ from .async_image_provider import AsyncImageProvider
|
|
|
12
12
|
from .image import Image
|
|
13
13
|
from .image_provider import ImageProvider
|
|
14
14
|
from .models import Action, ActionType, ImageConfig, Step
|
|
15
|
-
from .step_observer import
|
|
16
|
-
|
|
15
|
+
from .step_observer import (
|
|
16
|
+
ActionEvent,
|
|
17
|
+
AsyncObserver,
|
|
18
|
+
AsyncStepObserver,
|
|
19
|
+
BaseEvent,
|
|
20
|
+
ImageEvent,
|
|
21
|
+
LogEvent,
|
|
22
|
+
ObserverEvent,
|
|
23
|
+
PlanEvent,
|
|
24
|
+
SplitEvent,
|
|
25
|
+
StepEvent,
|
|
26
|
+
)
|
|
27
|
+
from .url import URL
|
|
17
28
|
|
|
18
29
|
__all__ = [
|
|
19
30
|
"Action",
|
|
31
|
+
"ActionEvent",
|
|
20
32
|
"ActionType",
|
|
33
|
+
"AsyncObserver",
|
|
34
|
+
"AsyncStepObserver",
|
|
35
|
+
"BaseEvent",
|
|
21
36
|
"Image",
|
|
22
37
|
"ImageConfig",
|
|
38
|
+
"ImageEvent",
|
|
39
|
+
"LogEvent",
|
|
40
|
+
"ObserverEvent",
|
|
41
|
+
"PlanEvent",
|
|
42
|
+
"SplitEvent",
|
|
23
43
|
"Step",
|
|
44
|
+
"StepEvent",
|
|
24
45
|
"ActionHandler",
|
|
25
46
|
"AsyncActionHandler",
|
|
26
47
|
"ImageProvider",
|
|
27
48
|
"AsyncImageProvider",
|
|
28
|
-
"
|
|
29
|
-
"URLImage",
|
|
49
|
+
"URL",
|
|
30
50
|
]
|
|
@@ -9,10 +9,11 @@
|
|
|
9
9
|
from typing import Protocol
|
|
10
10
|
|
|
11
11
|
from .image import Image
|
|
12
|
+
from .url import URL
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class AsyncImageProvider(Protocol):
|
|
15
|
-
async def __call__(self) -> Image:
|
|
16
|
+
async def __call__(self) -> Image | URL:
|
|
16
17
|
"""
|
|
17
18
|
Asynchronously provides an image.
|
|
18
19
|
|
|
@@ -28,7 +29,7 @@ class AsyncImageProvider(Protocol):
|
|
|
28
29
|
RuntimeError: If an error occurs during image capture or generation.
|
|
29
30
|
"""
|
|
30
31
|
|
|
31
|
-
async def last_image(self) -> Image:
|
|
32
|
+
async def last_image(self) -> Image | URL:
|
|
32
33
|
"""
|
|
33
34
|
Asynchronously returns the last captured image.
|
|
34
35
|
|
oagi/types/image_provider.py
CHANGED
|
@@ -9,10 +9,11 @@
|
|
|
9
9
|
from typing import Protocol
|
|
10
10
|
|
|
11
11
|
from .image import Image
|
|
12
|
+
from .url import URL
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class ImageProvider(Protocol):
|
|
15
|
-
def __call__(self) -> Image:
|
|
16
|
+
def __call__(self) -> Image | URL:
|
|
16
17
|
"""
|
|
17
18
|
Represents the functionality to invoke the callable object and produce an Image
|
|
18
19
|
result. Typically used to process or generate images using the defined logic
|
|
@@ -22,7 +23,7 @@ class ImageProvider(Protocol):
|
|
|
22
23
|
Image: The resulting image output from the callable logic.
|
|
23
24
|
"""
|
|
24
25
|
|
|
25
|
-
def last_image(self) -> Image:
|
|
26
|
+
def last_image(self) -> Image | URL:
|
|
26
27
|
"""
|
|
27
28
|
Returns the last captured image.
|
|
28
29
|
|