oagi-core 0.11.0__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/__init__.py +1 -3
- oagi/actor/__init__.py +21 -0
- oagi/{task → actor}/async_.py +23 -7
- oagi/{task → actor}/async_short.py +1 -1
- oagi/actor/base.py +222 -0
- oagi/{task → actor}/short.py +1 -1
- oagi/{task → actor}/sync.py +21 -5
- oagi/agent/default.py +2 -1
- oagi/agent/observer/exporters.py +6 -0
- oagi/agent/observer/report_template.html +19 -0
- oagi/agent/tasker/planner.py +14 -12
- oagi/agent/tasker/taskee_agent.py +8 -4
- oagi/agent/tasker/tasker_agent.py +1 -1
- oagi/client/async_.py +54 -96
- oagi/client/base.py +81 -133
- oagi/client/sync.py +52 -99
- oagi/constants.py +7 -2
- oagi/handler/__init__.py +37 -22
- oagi/handler/utils.py +21 -0
- oagi/task/__init__.py +22 -8
- oagi/types/models/__init__.py +0 -2
- oagi/types/models/action.py +4 -1
- oagi/types/models/client.py +1 -17
- oagi/types/step_observer.py +2 -0
- oagi/utils/__init__.py +12 -0
- oagi/utils/output_parser.py +166 -0
- oagi/utils/prompt_builder.py +44 -0
- {oagi_core-0.11.0.dist-info → oagi_core-0.12.1.dist-info}/METADATA +57 -10
- {oagi_core-0.11.0.dist-info → oagi_core-0.12.1.dist-info}/RECORD +32 -27
- oagi/task/base.py +0 -158
- {oagi_core-0.11.0.dist-info → oagi_core-0.12.1.dist-info}/WHEEL +0 -0
- {oagi_core-0.11.0.dist-info → oagi_core-0.12.1.dist-info}/entry_points.txt +0 -0
- {oagi_core-0.11.0.dist-info → oagi_core-0.12.1.dist-info}/licenses/LICENSE +0 -0
oagi/client/sync.py
CHANGED
|
@@ -9,28 +9,24 @@
|
|
|
9
9
|
from functools import wraps
|
|
10
10
|
|
|
11
11
|
import httpx
|
|
12
|
-
from httpx import
|
|
12
|
+
from httpx import HTTPTransport
|
|
13
|
+
from openai import OpenAI
|
|
13
14
|
|
|
14
15
|
from ..constants import (
|
|
15
|
-
API_HEALTH_ENDPOINT,
|
|
16
16
|
API_V1_FILE_UPLOAD_ENDPOINT,
|
|
17
17
|
API_V1_GENERATE_ENDPOINT,
|
|
18
|
-
|
|
18
|
+
DEFAULT_MAX_RETRIES,
|
|
19
19
|
HTTP_CLIENT_TIMEOUT,
|
|
20
20
|
)
|
|
21
21
|
from ..logging import get_logger
|
|
22
22
|
from ..types import Image
|
|
23
|
-
from ..types.models import GenerateResponse,
|
|
23
|
+
from ..types.models import GenerateResponse, UploadFileResponse, Usage
|
|
24
|
+
from ..types.models.step import Step
|
|
24
25
|
from .base import BaseClient
|
|
25
26
|
|
|
26
27
|
logger = get_logger("sync_client")
|
|
27
28
|
|
|
28
29
|
|
|
29
|
-
def _log_trace_id(response: Response):
|
|
30
|
-
logger.error(f"Request Id: {response.headers.get('x-request-id', '')}")
|
|
31
|
-
logger.error(f"Trace Id: {response.headers.get('x-trace-id', '')}")
|
|
32
|
-
|
|
33
|
-
|
|
34
30
|
def log_trace_on_failure(func):
|
|
35
31
|
"""Decorator that logs trace ID when a method fails."""
|
|
36
32
|
|
|
@@ -41,7 +37,7 @@ def log_trace_on_failure(func):
|
|
|
41
37
|
except Exception as e:
|
|
42
38
|
# Try to get response from the exception if it has one
|
|
43
39
|
if (response := getattr(e, "response", None)) is not None:
|
|
44
|
-
_log_trace_id(response)
|
|
40
|
+
BaseClient._log_trace_id(response)
|
|
45
41
|
raise
|
|
46
42
|
|
|
47
43
|
return wrapper
|
|
@@ -50,113 +46,70 @@ def log_trace_on_failure(func):
|
|
|
50
46
|
class SyncClient(BaseClient[httpx.Client]):
|
|
51
47
|
"""Synchronous HTTP client for the OAGI API."""
|
|
52
48
|
|
|
53
|
-
def __init__(
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
base_url: str | None = None,
|
|
52
|
+
api_key: str | None = None,
|
|
53
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
54
|
+
):
|
|
55
|
+
super().__init__(base_url, api_key, max_retries)
|
|
56
|
+
|
|
57
|
+
# OpenAI client for chat completions (with retries)
|
|
58
|
+
self.openai_client = OpenAI(
|
|
59
|
+
api_key=self.api_key,
|
|
60
|
+
base_url=f"{self.base_url}/v1",
|
|
61
|
+
max_retries=self.max_retries,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# httpx clients for S3 uploads and other endpoints (with retries)
|
|
65
|
+
transport = HTTPTransport(retries=self.max_retries)
|
|
66
|
+
self.http_client = httpx.Client(transport=transport, base_url=self.base_url)
|
|
67
|
+
self.upload_client = httpx.Client(
|
|
68
|
+
transport=transport, timeout=HTTP_CLIENT_TIMEOUT
|
|
69
|
+
)
|
|
70
|
+
|
|
57
71
|
logger.info(f"SyncClient initialized with base_url: {self.base_url}")
|
|
58
72
|
|
|
59
73
|
def __enter__(self):
|
|
60
74
|
return self
|
|
61
75
|
|
|
62
76
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
63
|
-
self.
|
|
64
|
-
self.upload_client.close()
|
|
77
|
+
self.close()
|
|
65
78
|
|
|
66
79
|
def close(self):
|
|
67
|
-
"""Close the underlying
|
|
68
|
-
self.
|
|
80
|
+
"""Close the underlying clients."""
|
|
81
|
+
self.openai_client.close()
|
|
82
|
+
self.http_client.close()
|
|
69
83
|
self.upload_client.close()
|
|
70
84
|
|
|
71
|
-
|
|
72
|
-
def create_message(
|
|
85
|
+
def chat_completion(
|
|
73
86
|
self,
|
|
74
87
|
model: str,
|
|
75
|
-
|
|
76
|
-
screenshot_url: str | None = None,
|
|
77
|
-
task_description: str | None = None,
|
|
78
|
-
task_id: str | None = None,
|
|
79
|
-
instruction: str | None = None,
|
|
80
|
-
messages_history: list | None = None,
|
|
88
|
+
messages: list,
|
|
81
89
|
temperature: float | None = None,
|
|
82
|
-
|
|
83
|
-
) ->
|
|
90
|
+
task_id: str | None = None,
|
|
91
|
+
) -> tuple[Step, str, Usage | None]:
|
|
84
92
|
"""
|
|
85
|
-
Call
|
|
93
|
+
Call OpenAI-compatible /v1/chat/completions endpoint.
|
|
86
94
|
|
|
87
95
|
Args:
|
|
88
|
-
model:
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
task_id: Task ID for continuing existing task
|
|
93
|
-
instruction: Additional instruction when continuing a session
|
|
94
|
-
messages_history: OpenAI-compatible chat message history
|
|
95
|
-
temperature: Sampling temperature (0.0-2.0) for LLM inference
|
|
96
|
-
api_version: API version header
|
|
96
|
+
model: Model to use for inference
|
|
97
|
+
messages: Full message history (OpenAI-compatible format)
|
|
98
|
+
temperature: Sampling temperature (0.0-2.0)
|
|
99
|
+
task_id: Optional task ID for multi-turn conversations
|
|
97
100
|
|
|
98
101
|
Returns:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
httpx.HTTPStatusError: For HTTP error responses
|
|
102
|
+
Tuple of (Step, raw_output, Usage)
|
|
103
|
+
- Step: Parsed actions and reasoning
|
|
104
|
+
- raw_output: Raw model output string (for message history)
|
|
105
|
+
- Usage: Token usage statistics (or None if not available)
|
|
104
106
|
"""
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
"Exactly one of 'screenshot' or 'screenshot_url' must be provided"
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
self._log_request_info(model, task_description, task_id)
|
|
112
|
-
|
|
113
|
-
# Upload screenshot to S3 if bytes provided, otherwise use URL directly
|
|
114
|
-
upload_file_response = None
|
|
115
|
-
if screenshot is not None:
|
|
116
|
-
upload_file_response = self.put_s3_presigned_url(screenshot, api_version)
|
|
117
|
-
|
|
118
|
-
# Prepare message payload
|
|
119
|
-
headers, payload = self._prepare_message_payload(
|
|
120
|
-
model=model,
|
|
121
|
-
upload_file_response=upload_file_response,
|
|
122
|
-
task_description=task_description,
|
|
123
|
-
task_id=task_id,
|
|
124
|
-
instruction=instruction,
|
|
125
|
-
messages_history=messages_history,
|
|
126
|
-
temperature=temperature,
|
|
127
|
-
api_version=api_version,
|
|
128
|
-
screenshot_url=screenshot_url,
|
|
107
|
+
logger.info(f"Making chat completion request with model: {model}")
|
|
108
|
+
kwargs = self._build_chat_completion_kwargs(
|
|
109
|
+
model, messages, temperature, task_id
|
|
129
110
|
)
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
try:
|
|
133
|
-
response = self.client.post(
|
|
134
|
-
API_V2_MESSAGE_ENDPOINT,
|
|
135
|
-
json=payload,
|
|
136
|
-
headers=headers,
|
|
137
|
-
timeout=self.timeout,
|
|
138
|
-
)
|
|
139
|
-
return self._process_response(response)
|
|
140
|
-
except (httpx.TimeoutException, httpx.NetworkError) as e:
|
|
141
|
-
self._handle_upload_http_errors(e)
|
|
142
|
-
|
|
143
|
-
def health_check(self) -> dict:
|
|
144
|
-
"""
|
|
145
|
-
Call the /health endpoint for health check
|
|
146
|
-
|
|
147
|
-
Returns:
|
|
148
|
-
dict: Health check response
|
|
149
|
-
"""
|
|
150
|
-
logger.debug("Making health check request")
|
|
151
|
-
try:
|
|
152
|
-
response = self.client.get(API_HEALTH_ENDPOINT)
|
|
153
|
-
response.raise_for_status()
|
|
154
|
-
result = response.json()
|
|
155
|
-
logger.debug("Health check successful")
|
|
156
|
-
return result
|
|
157
|
-
except httpx.HTTPStatusError as e:
|
|
158
|
-
logger.warning(f"Health check failed: {e}")
|
|
159
|
-
raise
|
|
111
|
+
response = self.openai_client.chat.completions.create(**kwargs)
|
|
112
|
+
return self._parse_chat_completion_response(response)
|
|
160
113
|
|
|
161
114
|
def get_s3_presigned_url(
|
|
162
115
|
self,
|
|
@@ -175,7 +128,7 @@ class SyncClient(BaseClient[httpx.Client]):
|
|
|
175
128
|
|
|
176
129
|
try:
|
|
177
130
|
headers = self._build_headers(api_version)
|
|
178
|
-
response = self.
|
|
131
|
+
response = self.http_client.get(
|
|
179
132
|
API_V1_FILE_UPLOAD_ENDPOINT, headers=headers, timeout=self.timeout
|
|
180
133
|
)
|
|
181
134
|
return self._process_upload_response(response)
|
|
@@ -295,7 +248,7 @@ class SyncClient(BaseClient[httpx.Client]):
|
|
|
295
248
|
|
|
296
249
|
# Make request
|
|
297
250
|
try:
|
|
298
|
-
response = self.
|
|
251
|
+
response = self.http_client.post(
|
|
299
252
|
API_V1_GENERATE_ENDPOINT,
|
|
300
253
|
json=payload,
|
|
301
254
|
headers=headers,
|
oagi/constants.py
CHANGED
|
@@ -9,10 +9,8 @@
|
|
|
9
9
|
# URLs & API Endpoints
|
|
10
10
|
DEFAULT_BASE_URL = "https://api.agiopen.org"
|
|
11
11
|
API_KEY_HELP_URL = "https://developer.agiopen.org/api-keys"
|
|
12
|
-
API_V2_MESSAGE_ENDPOINT = "/v2/message"
|
|
13
12
|
API_V1_FILE_UPLOAD_ENDPOINT = "/v1/file/upload"
|
|
14
13
|
API_V1_GENERATE_ENDPOINT = "/v1/generate"
|
|
15
|
-
API_HEALTH_ENDPOINT = "/health"
|
|
16
14
|
|
|
17
15
|
# Model identifiers
|
|
18
16
|
MODEL_ACTOR = "lux-actor-1"
|
|
@@ -28,6 +26,10 @@ DEFAULT_MAX_STEPS = 20
|
|
|
28
26
|
DEFAULT_MAX_STEPS_THINKER = 100
|
|
29
27
|
DEFAULT_MAX_STEPS_TASKER = 60
|
|
30
28
|
|
|
29
|
+
# Maximum allowed steps per model (hard limits)
|
|
30
|
+
MAX_STEPS_ACTOR = 30
|
|
31
|
+
MAX_STEPS_THINKER = 120
|
|
32
|
+
|
|
31
33
|
# Reflection intervals
|
|
32
34
|
DEFAULT_REFLECTION_INTERVAL = 4
|
|
33
35
|
DEFAULT_REFLECTION_INTERVAL_TASKER = 20
|
|
@@ -41,3 +43,6 @@ DEFAULT_TEMPERATURE_LOW = 0.1
|
|
|
41
43
|
|
|
42
44
|
# Timeout Values
|
|
43
45
|
HTTP_CLIENT_TIMEOUT = 60
|
|
46
|
+
|
|
47
|
+
# Retry Configuration
|
|
48
|
+
DEFAULT_MAX_RETRIES = 2
|
oagi/handler/__init__.py
CHANGED
|
@@ -5,28 +5,43 @@
|
|
|
5
5
|
# This file is part of the official API project.
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
|
-
|
|
9
|
-
from
|
|
10
|
-
|
|
11
|
-
from
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
8
|
+
import importlib
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
from .utils import reset_handler
|
|
12
|
+
|
|
13
|
+
# Lazy imports for pyautogui-dependent modules to avoid import errors on headless systems
|
|
14
|
+
_LAZY_IMPORTS: dict[str, str] = {
|
|
15
|
+
"AsyncPyautoguiActionHandler": "oagi.handler.async_pyautogui_action_handler",
|
|
16
|
+
"AsyncScreenshotMaker": "oagi.handler.async_screenshot_maker",
|
|
17
|
+
"PILImage": "oagi.handler.pil_image",
|
|
18
|
+
"PyautoguiActionHandler": "oagi.handler.pyautogui_action_handler",
|
|
19
|
+
"PyautoguiConfig": "oagi.handler.pyautogui_action_handler",
|
|
20
|
+
"ScreenshotMaker": "oagi.handler.screenshot_maker",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from oagi.handler.async_pyautogui_action_handler import AsyncPyautoguiActionHandler
|
|
25
|
+
from oagi.handler.async_screenshot_maker import AsyncScreenshotMaker
|
|
26
|
+
from oagi.handler.pil_image import PILImage
|
|
27
|
+
from oagi.handler.pyautogui_action_handler import (
|
|
28
|
+
PyautoguiActionHandler,
|
|
29
|
+
PyautoguiConfig,
|
|
30
|
+
)
|
|
31
|
+
from oagi.handler.screenshot_maker import ScreenshotMaker
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def __getattr__(name: str):
|
|
35
|
+
"""Lazy import for pyautogui-dependent modules."""
|
|
36
|
+
if name in _LAZY_IMPORTS:
|
|
37
|
+
module = importlib.import_module(_LAZY_IMPORTS[name])
|
|
38
|
+
return getattr(module, name)
|
|
39
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def __dir__() -> list[str]:
|
|
43
|
+
"""Return all public names including lazy imports."""
|
|
44
|
+
return sorted(set(__all__) | set(_LAZY_IMPORTS.keys()))
|
|
30
45
|
|
|
31
46
|
|
|
32
47
|
__all__ = [
|
oagi/handler/utils.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def reset_handler(handler) -> None:
|
|
11
|
+
"""Reset handler state if supported.
|
|
12
|
+
|
|
13
|
+
Uses duck-typing to check if the handler has a reset() method.
|
|
14
|
+
This allows handlers to reset their internal state (e.g., capslock state)
|
|
15
|
+
at the start of a new automation task.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
handler: The action handler to reset
|
|
19
|
+
"""
|
|
20
|
+
if hasattr(handler, "reset"):
|
|
21
|
+
handler.reset()
|
oagi/task/__init__.py
CHANGED
|
@@ -6,16 +6,30 @@
|
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
"""Deprecated: Use oagi.actor instead. This module will be removed in a future version."""
|
|
10
|
+
|
|
11
|
+
import warnings
|
|
12
|
+
|
|
13
|
+
from oagi.actor import (
|
|
14
|
+
Actor,
|
|
15
|
+
AsyncActor,
|
|
16
|
+
AsyncShortTask,
|
|
17
|
+
AsyncTask,
|
|
18
|
+
ShortTask,
|
|
19
|
+
Task,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
warnings.warn(
|
|
23
|
+
"oagi.task is deprecated, use oagi.actor instead",
|
|
24
|
+
DeprecationWarning,
|
|
25
|
+
stacklevel=2,
|
|
26
|
+
)
|
|
13
27
|
|
|
14
28
|
__all__ = [
|
|
15
29
|
"Actor",
|
|
16
30
|
"AsyncActor",
|
|
17
|
-
"Task",
|
|
18
|
-
"AsyncTask",
|
|
19
|
-
"ShortTask",
|
|
20
|
-
"AsyncShortTask",
|
|
31
|
+
"Task",
|
|
32
|
+
"AsyncTask",
|
|
33
|
+
"ShortTask",
|
|
34
|
+
"AsyncShortTask",
|
|
21
35
|
]
|
oagi/types/models/__init__.py
CHANGED
|
@@ -17,7 +17,6 @@ from .client import (
|
|
|
17
17
|
ErrorDetail,
|
|
18
18
|
ErrorResponse,
|
|
19
19
|
GenerateResponse,
|
|
20
|
-
LLMResponse,
|
|
21
20
|
UploadFileResponse,
|
|
22
21
|
Usage,
|
|
23
22
|
)
|
|
@@ -31,7 +30,6 @@ __all__ = [
|
|
|
31
30
|
"ErrorResponse",
|
|
32
31
|
"GenerateResponse",
|
|
33
32
|
"ImageConfig",
|
|
34
|
-
"LLMResponse",
|
|
35
33
|
"Step",
|
|
36
34
|
"UploadFileResponse",
|
|
37
35
|
"Usage",
|
oagi/types/models/action.py
CHANGED
|
@@ -81,4 +81,7 @@ def parse_scroll(args_str: str) -> tuple[int, int, str] | None:
|
|
|
81
81
|
match = re.match(r"(\d+),\s*(\d+),\s*(\w+)", args_str)
|
|
82
82
|
if not match:
|
|
83
83
|
return None
|
|
84
|
-
|
|
84
|
+
direction = match.group(3).lower()
|
|
85
|
+
if direction not in ("up", "down"):
|
|
86
|
+
return None
|
|
87
|
+
return int(match.group(1)), int(match.group(2)), direction
|
oagi/types/models/client.py
CHANGED
|
@@ -8,8 +8,6 @@
|
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
|
-
from .action import Action
|
|
12
|
-
|
|
13
11
|
|
|
14
12
|
class Usage(BaseModel):
|
|
15
13
|
prompt_tokens: int
|
|
@@ -30,21 +28,6 @@ class ErrorResponse(BaseModel):
|
|
|
30
28
|
error: ErrorDetail | None
|
|
31
29
|
|
|
32
30
|
|
|
33
|
-
class LLMResponse(BaseModel):
|
|
34
|
-
id: str
|
|
35
|
-
task_id: str
|
|
36
|
-
object: str = "task.completion"
|
|
37
|
-
created: int
|
|
38
|
-
model: str
|
|
39
|
-
task_description: str
|
|
40
|
-
is_complete: bool
|
|
41
|
-
actions: list[Action]
|
|
42
|
-
reason: str | None = None
|
|
43
|
-
usage: Usage
|
|
44
|
-
error: ErrorDetail | None = None
|
|
45
|
-
raw_output: str | None = None
|
|
46
|
-
|
|
47
|
-
|
|
48
31
|
class UploadFileResponse(BaseModel):
|
|
49
32
|
"""Response from S3 presigned URL upload."""
|
|
50
33
|
|
|
@@ -66,3 +49,4 @@ class GenerateResponse(BaseModel):
|
|
|
66
49
|
deprecated=True,
|
|
67
50
|
description="This field is deprecated",
|
|
68
51
|
)
|
|
52
|
+
request_id: str | None = None
|
oagi/types/step_observer.py
CHANGED
|
@@ -35,6 +35,7 @@ class StepEvent(BaseEvent):
|
|
|
35
35
|
step_num: int
|
|
36
36
|
image: bytes | str
|
|
37
37
|
step: Step
|
|
38
|
+
task_id: str | None = None
|
|
38
39
|
|
|
39
40
|
|
|
40
41
|
class ActionEvent(BaseEvent):
|
|
@@ -68,6 +69,7 @@ class PlanEvent(BaseEvent):
|
|
|
68
69
|
image: bytes | str | None = None
|
|
69
70
|
reasoning: str
|
|
70
71
|
result: str | None = None
|
|
72
|
+
request_id: str | None = None
|
|
71
73
|
|
|
72
74
|
|
|
73
75
|
ObserverEvent = ImageEvent | StepEvent | ActionEvent | LogEvent | SplitEvent | PlanEvent
|
oagi/utils/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
from .output_parser import parse_raw_output
|
|
10
|
+
from .prompt_builder import build_prompt
|
|
11
|
+
|
|
12
|
+
__all__ = ["build_prompt", "parse_raw_output"]
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
|
|
11
|
+
from ..types.models.action import Action, ActionType
|
|
12
|
+
from ..types.models.step import Step
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def parse_raw_output(raw_output: str) -> Step:
|
|
16
|
+
"""Parse raw LLM output into structured Step format.
|
|
17
|
+
|
|
18
|
+
Expected format:
|
|
19
|
+
<|think_start|> reasoning text <|think_end|>
|
|
20
|
+
<|action_start|> action1(args) & action2(args) & ... <|action_end|>
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
raw_output: Raw text output from the LLM
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Step object with parsed reasoning and actions
|
|
27
|
+
"""
|
|
28
|
+
# Extract reasoning/thinking
|
|
29
|
+
think_pattern = r"<\|think_start\|>(.*?)<\|think_end\|>"
|
|
30
|
+
think_match = re.search(think_pattern, raw_output, re.DOTALL)
|
|
31
|
+
reason = think_match.group(1).strip() if think_match else ""
|
|
32
|
+
|
|
33
|
+
# Extract action block
|
|
34
|
+
action_pattern = r"<\|action_start\|>(.*?)<\|action_end\|>"
|
|
35
|
+
action_match = re.search(action_pattern, raw_output, re.DOTALL)
|
|
36
|
+
|
|
37
|
+
actions: list[Action] = []
|
|
38
|
+
stop = False
|
|
39
|
+
|
|
40
|
+
if action_match:
|
|
41
|
+
action_block = action_match.group(1).strip()
|
|
42
|
+
action_texts = _split_actions(action_block)
|
|
43
|
+
|
|
44
|
+
for action_text in action_texts:
|
|
45
|
+
parsed_action = _parse_action(action_text.strip())
|
|
46
|
+
if parsed_action:
|
|
47
|
+
actions.append(parsed_action)
|
|
48
|
+
if parsed_action.type == ActionType.FINISH:
|
|
49
|
+
stop = True
|
|
50
|
+
|
|
51
|
+
return Step(reason=reason, actions=actions, stop=stop)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _split_actions(action_block: str) -> list[str]:
|
|
55
|
+
"""Split action block by & separator, but only when & is outside parentheses.
|
|
56
|
+
|
|
57
|
+
Note: This parser does NOT handle '&' inside quoted strings.
|
|
58
|
+
E.g., type("a&b") would incorrectly split. The LLM should avoid
|
|
59
|
+
this pattern by using alternative escape sequences.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
action_block: String containing one or more actions separated by &
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
List of individual action strings
|
|
66
|
+
"""
|
|
67
|
+
actions: list[str] = []
|
|
68
|
+
current_action: list[str] = []
|
|
69
|
+
paren_level = 0
|
|
70
|
+
|
|
71
|
+
for char in action_block:
|
|
72
|
+
if char == "(":
|
|
73
|
+
paren_level += 1
|
|
74
|
+
current_action.append(char)
|
|
75
|
+
elif char == ")":
|
|
76
|
+
paren_level -= 1
|
|
77
|
+
current_action.append(char)
|
|
78
|
+
elif char == "&" and paren_level == 0:
|
|
79
|
+
action_str = "".join(current_action).strip()
|
|
80
|
+
if action_str:
|
|
81
|
+
actions.append(action_str)
|
|
82
|
+
current_action = []
|
|
83
|
+
else:
|
|
84
|
+
current_action.append(char)
|
|
85
|
+
|
|
86
|
+
# Add the last action
|
|
87
|
+
action_str = "".join(current_action).strip()
|
|
88
|
+
if action_str:
|
|
89
|
+
actions.append(action_str)
|
|
90
|
+
|
|
91
|
+
return actions
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _parse_action(action_text: str) -> Action | None:
|
|
95
|
+
"""Parse individual action text into Action object.
|
|
96
|
+
|
|
97
|
+
Expected formats:
|
|
98
|
+
- click(x, y) # left-click at position
|
|
99
|
+
- left_double(x, y) # left-double-click at position
|
|
100
|
+
- left_triple(x, y) # left-triple-click at position
|
|
101
|
+
- right_single(x, y) # right-click at position
|
|
102
|
+
- drag(x1, y1, x2, y2) # drag from (x1, y1) to (x2, y2)
|
|
103
|
+
- hotkey(key, c) # press key c times
|
|
104
|
+
- type(text) # type text string
|
|
105
|
+
- scroll(x, y, direction, c) # scroll at position
|
|
106
|
+
- wait() # wait for a while
|
|
107
|
+
- finish() # indicate task is finished
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
action_text: String representation of a single action
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Action object or None if parsing fails
|
|
114
|
+
"""
|
|
115
|
+
# Match action format: action_type(arguments)
|
|
116
|
+
match = re.match(r"(\w+)\((.*)\)", action_text.strip())
|
|
117
|
+
if not match:
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
action_type = match.group(1).lower()
|
|
121
|
+
arguments = match.group(2).strip()
|
|
122
|
+
|
|
123
|
+
# Parse count from arguments for actions that support it
|
|
124
|
+
count = 1
|
|
125
|
+
|
|
126
|
+
# Validate and map action type to enum
|
|
127
|
+
try:
|
|
128
|
+
action_enum = ActionType(action_type)
|
|
129
|
+
except ValueError:
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
# Parse specific action types and extract count where applicable
|
|
133
|
+
match action_enum:
|
|
134
|
+
case ActionType.HOTKEY:
|
|
135
|
+
# hotkey(key, c) - press key c times
|
|
136
|
+
args = arguments.rsplit(",", 1)
|
|
137
|
+
if len(args) >= 2 and args[1].strip():
|
|
138
|
+
key = args[0].strip()
|
|
139
|
+
try:
|
|
140
|
+
count = int(args[1].strip())
|
|
141
|
+
except ValueError:
|
|
142
|
+
count = 1
|
|
143
|
+
else:
|
|
144
|
+
key = arguments.strip()
|
|
145
|
+
count = 1
|
|
146
|
+
arguments = key
|
|
147
|
+
|
|
148
|
+
case ActionType.SCROLL:
|
|
149
|
+
# scroll(x, y, direction, c) - scroll at position
|
|
150
|
+
args = arguments.split(",")
|
|
151
|
+
if len(args) >= 4:
|
|
152
|
+
x = args[0].strip()
|
|
153
|
+
y = args[1].strip()
|
|
154
|
+
direction = args[2].strip()
|
|
155
|
+
try:
|
|
156
|
+
count = int(args[3].strip())
|
|
157
|
+
except (ValueError, IndexError):
|
|
158
|
+
count = 1
|
|
159
|
+
# Reconstruct arguments without count
|
|
160
|
+
arguments = f"{x},{y},{direction}"
|
|
161
|
+
|
|
162
|
+
case _:
|
|
163
|
+
# For other actions, use default count of 1
|
|
164
|
+
pass
|
|
165
|
+
|
|
166
|
+
return Action(type=action_enum, argument=arguments, count=count)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
instruction_template = """You are a Desktop Agent completing computer use tasks from a user instruction.
|
|
10
|
+
|
|
11
|
+
Every step, you will look at the screenshot and output the desired actions in a format as:
|
|
12
|
+
|
|
13
|
+
<|think_start|> brief description of your intent and reasoning <|think_end|>
|
|
14
|
+
<|action_start|> one of the allowed actions as below <|action_end|>
|
|
15
|
+
|
|
16
|
+
In the action field, you have the following action formats:
|
|
17
|
+
1. click(x, y) # left-click at the position (x, y), where x and y are integers normalized between 0 and 1000
|
|
18
|
+
2. left_double(x, y) # left-double-click at the position (x, y), where x and y are integers normalized between 0 and 1000
|
|
19
|
+
3. left_triple(x, y) # left-triple-click at the position (x, y), where x and y are integers normalized between 0 and 1000
|
|
20
|
+
4. right_single(x, y) # right-click at the position (x, y), where x and y are integers normalized between 0 and 1000
|
|
21
|
+
5. drag(x1, y1, x2, y2) # drag the mouse from (x1, y1) to (x2, y2) to select or move contents, where x1, y1, x2, y2 are integers normalized between 0 and 1000
|
|
22
|
+
6. hotkey(key, c) # press the key for c times
|
|
23
|
+
7. type(text) # type a text string on the keyboard
|
|
24
|
+
8. scroll(x, y, direction, c) # scroll the mouse at position (x, y) in the direction of up or down for c times, where x and y are integers normalized between 0 and 1000
|
|
25
|
+
9. wait() # wait for a while
|
|
26
|
+
10. finish() # indicate the task is finished
|
|
27
|
+
|
|
28
|
+
Directly output the text beginning with <|think_start|>, no additional text is needed for this scenario.
|
|
29
|
+
|
|
30
|
+
The user instruction is:
|
|
31
|
+
{instruction}
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def build_prompt(task_description: str) -> str:
|
|
36
|
+
"""Build the instruction prompt for the OAGI model.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
task_description: The task description to include in the prompt
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
The formatted prompt string with action format documentation
|
|
43
|
+
"""
|
|
44
|
+
return instruction_template.format(instruction=task_description)
|