oagi-core 0.10.2__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/agent/default.py +14 -4
- oagi/agent/factories.py +98 -16
- oagi/agent/tasker/planner.py +19 -8
- oagi/agent/tasker/taskee_agent.py +31 -9
- oagi/agent/tasker/tasker_agent.py +16 -5
- oagi/cli/agent.py +70 -31
- oagi/cli/display.py +2 -1
- oagi/cli/server.py +1 -1
- oagi/cli/utils.py +4 -3
- oagi/client/async_.py +19 -6
- oagi/client/base.py +14 -16
- oagi/client/sync.py +19 -6
- oagi/constants.py +43 -0
- oagi/handler/__init__.py +16 -0
- oagi/handler/_macos.py +137 -0
- oagi/handler/_windows.py +101 -0
- oagi/handler/async_pyautogui_action_handler.py +8 -0
- oagi/handler/capslock_manager.py +55 -0
- oagi/handler/pyautogui_action_handler.py +23 -40
- oagi/server/config.py +6 -3
- oagi/server/models.py +5 -3
- oagi/server/session_store.py +8 -6
- oagi/server/socketio_server.py +6 -5
- oagi/task/async_.py +4 -3
- oagi/task/async_short.py +3 -2
- oagi/task/base.py +2 -1
- oagi/task/short.py +3 -2
- oagi/task/sync.py +4 -3
- oagi/types/__init__.py +2 -1
- oagi/types/url.py +25 -0
- {oagi_core-0.10.2.dist-info → oagi_core-0.11.0.dist-info}/METADATA +34 -1
- {oagi_core-0.10.2.dist-info → oagi_core-0.11.0.dist-info}/RECORD +35 -32
- {oagi_core-0.10.2.dist-info → oagi_core-0.11.0.dist-info}/WHEEL +0 -0
- {oagi_core-0.10.2.dist-info → oagi_core-0.11.0.dist-info}/entry_points.txt +0 -0
- {oagi_core-0.10.2.dist-info → oagi_core-0.11.0.dist-info}/licenses/LICENSE +0 -0
oagi/agent/default.py
CHANGED
|
@@ -10,6 +10,13 @@ import asyncio
|
|
|
10
10
|
import logging
|
|
11
11
|
|
|
12
12
|
from .. import AsyncActor
|
|
13
|
+
from ..constants import (
|
|
14
|
+
DEFAULT_MAX_STEPS,
|
|
15
|
+
DEFAULT_STEP_DELAY,
|
|
16
|
+
DEFAULT_TEMPERATURE,
|
|
17
|
+
MODEL_ACTOR,
|
|
18
|
+
)
|
|
19
|
+
from ..handler import reset_handler
|
|
13
20
|
from ..types import (
|
|
14
21
|
ActionEvent,
|
|
15
22
|
AsyncActionHandler,
|
|
@@ -36,11 +43,11 @@ class AsyncDefaultAgent:
|
|
|
36
43
|
self,
|
|
37
44
|
api_key: str | None = None,
|
|
38
45
|
base_url: str | None = None,
|
|
39
|
-
model: str =
|
|
40
|
-
max_steps: int =
|
|
41
|
-
temperature: float | None =
|
|
46
|
+
model: str = MODEL_ACTOR,
|
|
47
|
+
max_steps: int = DEFAULT_MAX_STEPS,
|
|
48
|
+
temperature: float | None = DEFAULT_TEMPERATURE,
|
|
42
49
|
step_observer: AsyncObserver | None = None,
|
|
43
|
-
step_delay: float =
|
|
50
|
+
step_delay: float = DEFAULT_STEP_DELAY,
|
|
44
51
|
):
|
|
45
52
|
self.api_key = api_key
|
|
46
53
|
self.base_url = base_url
|
|
@@ -62,6 +69,9 @@ class AsyncDefaultAgent:
|
|
|
62
69
|
logger.info(f"Starting async task execution: {instruction}")
|
|
63
70
|
await self.actor.init_task(instruction, max_steps=self.max_steps)
|
|
64
71
|
|
|
72
|
+
# Reset handler state at automation start
|
|
73
|
+
reset_handler(action_handler)
|
|
74
|
+
|
|
65
75
|
for i in range(self.max_steps):
|
|
66
76
|
step_num = i + 1
|
|
67
77
|
logger.debug(f"Executing step {step_num}/{self.max_steps}")
|
oagi/agent/factories.py
CHANGED
|
@@ -6,6 +6,16 @@
|
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
8
|
from oagi.agent.tasker import TaskerAgent
|
|
9
|
+
from oagi.constants import (
|
|
10
|
+
DEFAULT_MAX_STEPS,
|
|
11
|
+
DEFAULT_MAX_STEPS_TASKER,
|
|
12
|
+
DEFAULT_MAX_STEPS_THINKER,
|
|
13
|
+
DEFAULT_REFLECTION_INTERVAL_TASKER,
|
|
14
|
+
DEFAULT_STEP_DELAY,
|
|
15
|
+
DEFAULT_TEMPERATURE_LOW,
|
|
16
|
+
MODEL_ACTOR,
|
|
17
|
+
MODEL_THINKER,
|
|
18
|
+
)
|
|
9
19
|
from oagi.types import AsyncStepObserver
|
|
10
20
|
|
|
11
21
|
from .default import AsyncDefaultAgent
|
|
@@ -17,11 +27,11 @@ from .registry import async_agent_register
|
|
|
17
27
|
def create_default_agent(
|
|
18
28
|
api_key: str | None = None,
|
|
19
29
|
base_url: str | None = None,
|
|
20
|
-
model: str =
|
|
21
|
-
max_steps: int =
|
|
22
|
-
temperature: float =
|
|
30
|
+
model: str = MODEL_ACTOR,
|
|
31
|
+
max_steps: int = DEFAULT_MAX_STEPS,
|
|
32
|
+
temperature: float = DEFAULT_TEMPERATURE_LOW,
|
|
23
33
|
step_observer: AsyncStepObserver | None = None,
|
|
24
|
-
step_delay: float =
|
|
34
|
+
step_delay: float = DEFAULT_STEP_DELAY,
|
|
25
35
|
) -> AsyncAgent:
|
|
26
36
|
return AsyncDefaultAgent(
|
|
27
37
|
api_key=api_key,
|
|
@@ -38,11 +48,11 @@ def create_default_agent(
|
|
|
38
48
|
def create_thinker_agent(
|
|
39
49
|
api_key: str | None = None,
|
|
40
50
|
base_url: str | None = None,
|
|
41
|
-
model: str =
|
|
42
|
-
max_steps: int =
|
|
43
|
-
temperature: float =
|
|
51
|
+
model: str = MODEL_THINKER,
|
|
52
|
+
max_steps: int = DEFAULT_MAX_STEPS_THINKER,
|
|
53
|
+
temperature: float = DEFAULT_TEMPERATURE_LOW,
|
|
44
54
|
step_observer: AsyncStepObserver | None = None,
|
|
45
|
-
step_delay: float =
|
|
55
|
+
step_delay: float = DEFAULT_STEP_DELAY,
|
|
46
56
|
) -> AsyncAgent:
|
|
47
57
|
return AsyncDefaultAgent(
|
|
48
58
|
api_key=api_key,
|
|
@@ -55,16 +65,22 @@ def create_thinker_agent(
|
|
|
55
65
|
)
|
|
56
66
|
|
|
57
67
|
|
|
58
|
-
@async_agent_register(mode="tasker")
|
|
59
|
-
def
|
|
68
|
+
@async_agent_register(mode="tasker:cvs_appointment")
|
|
69
|
+
def create_cvs_appointment_agent(
|
|
60
70
|
api_key: str | None = None,
|
|
61
71
|
base_url: str | None = None,
|
|
62
|
-
model: str =
|
|
63
|
-
max_steps: int =
|
|
64
|
-
temperature: float =
|
|
65
|
-
reflection_interval: int =
|
|
72
|
+
model: str = MODEL_ACTOR,
|
|
73
|
+
max_steps: int = DEFAULT_MAX_STEPS_TASKER,
|
|
74
|
+
temperature: float = DEFAULT_TEMPERATURE_LOW,
|
|
75
|
+
reflection_interval: int = DEFAULT_REFLECTION_INTERVAL_TASKER,
|
|
66
76
|
step_observer: AsyncStepObserver | None = None,
|
|
67
|
-
step_delay: float =
|
|
77
|
+
step_delay: float = DEFAULT_STEP_DELAY,
|
|
78
|
+
# CVS-specific parameters
|
|
79
|
+
first_name: str = "First",
|
|
80
|
+
last_name: str = "Last",
|
|
81
|
+
email: str = "user@example.com",
|
|
82
|
+
birthday: str = "01-01-1990", # MM-DD-YYYY
|
|
83
|
+
zip_code: str = "00000",
|
|
68
84
|
) -> AsyncAgent:
|
|
69
85
|
tasker = TaskerAgent(
|
|
70
86
|
api_key=api_key,
|
|
@@ -76,5 +92,71 @@ def create_planner_agent(
|
|
|
76
92
|
step_observer=step_observer,
|
|
77
93
|
step_delay=step_delay,
|
|
78
94
|
)
|
|
79
|
-
|
|
95
|
+
|
|
96
|
+
month, day, year = birthday.split("-")
|
|
97
|
+
instruction = (
|
|
98
|
+
f"Schedule an appointment at CVS for {first_name} {last_name} "
|
|
99
|
+
f"with email {email} and birthday {birthday}"
|
|
100
|
+
)
|
|
101
|
+
todos = [
|
|
102
|
+
"Open a new tab, go to www.cvs.com, type 'flu shot' in the search bar and press enter, "
|
|
103
|
+
"wait for the page to load, then click on the button of Schedule vaccinations on the "
|
|
104
|
+
"top of the page",
|
|
105
|
+
f"Enter the first name '{first_name}', last name '{last_name}', and email '{email}' "
|
|
106
|
+
"in the form. Do not use any suggested autofills. Make sure the mobile phone number "
|
|
107
|
+
"is empty.",
|
|
108
|
+
f"Slightly scroll down to see the date of birth, enter Month '{month}', Day '{day}', "
|
|
109
|
+
f"and Year '{year}' in the form",
|
|
110
|
+
"Click on 'Continue as guest' button, wait for the page to load with wait, "
|
|
111
|
+
"click on 'Add vaccines' button, select 'Flu' and click on 'Add vaccines'",
|
|
112
|
+
f"Click on 'next' to enter the page with recommendation vaccines, then click on "
|
|
113
|
+
f"'next' again, until on the page of entering zip code, enter '{zip_code}', select "
|
|
114
|
+
"the first option from the dropdown menu, and click on 'Search'",
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
tasker.set_task(instruction, todos)
|
|
118
|
+
return tasker
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@async_agent_register(mode="tasker:software_qa")
|
|
122
|
+
def create_software_qa_agent(
|
|
123
|
+
api_key: str | None = None,
|
|
124
|
+
base_url: str | None = None,
|
|
125
|
+
model: str = MODEL_ACTOR,
|
|
126
|
+
max_steps: int = DEFAULT_MAX_STEPS_TASKER,
|
|
127
|
+
temperature: float = DEFAULT_TEMPERATURE_LOW,
|
|
128
|
+
reflection_interval: int = DEFAULT_REFLECTION_INTERVAL_TASKER,
|
|
129
|
+
step_observer: AsyncStepObserver | None = None,
|
|
130
|
+
step_delay: float = DEFAULT_STEP_DELAY,
|
|
131
|
+
) -> AsyncAgent:
|
|
132
|
+
tasker = TaskerAgent(
|
|
133
|
+
api_key=api_key,
|
|
134
|
+
base_url=base_url,
|
|
135
|
+
model=model,
|
|
136
|
+
max_steps=max_steps,
|
|
137
|
+
temperature=temperature,
|
|
138
|
+
reflection_interval=reflection_interval,
|
|
139
|
+
step_observer=step_observer,
|
|
140
|
+
step_delay=step_delay,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
instruction = "QA: click through every sidebar button in the Nuclear Player UI"
|
|
144
|
+
todos = [
|
|
145
|
+
"Click on 'Dashboard' in the left sidebar",
|
|
146
|
+
"Click on 'Downloads' in the left sidebar",
|
|
147
|
+
"Click on 'Lyrics' in the left sidebar",
|
|
148
|
+
"Click on 'Plugins' in the left sidebar",
|
|
149
|
+
"Click on 'Search Results' in the left sidebar",
|
|
150
|
+
"Click on 'Settings' in the left sidebar",
|
|
151
|
+
"Click on 'Equalizer' in the left sidebar",
|
|
152
|
+
"Click on 'Visualizer' in the left sidebar",
|
|
153
|
+
"Click on 'Listening History' in the left sidebar",
|
|
154
|
+
"Click on 'Favorite Albums' in the left sidebar",
|
|
155
|
+
"Click on 'Favorite Tracks' in the left sidebar",
|
|
156
|
+
"Click on 'Favorite Artists' in the left sidebar",
|
|
157
|
+
"Click on 'Local Library' in the left sidebar",
|
|
158
|
+
"Click on 'Playlists' in the left sidebar",
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
tasker.set_task(instruction, todos)
|
|
80
162
|
return tasker
|
oagi/agent/tasker/planner.py
CHANGED
|
@@ -10,7 +10,8 @@ import json
|
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
12
|
from ...client import AsyncClient
|
|
13
|
-
from ...
|
|
13
|
+
from ...constants import DEFAULT_REFLECTION_INTERVAL
|
|
14
|
+
from ...types import URL, Image, extract_uuid_from_url
|
|
14
15
|
from .memory import PlannerMemory
|
|
15
16
|
from .models import Action, PlannerOutput, ReflectionOutput
|
|
16
17
|
|
|
@@ -137,11 +138,16 @@ class Planner:
|
|
|
137
138
|
# Ensure we have a client
|
|
138
139
|
client = self._ensure_client()
|
|
139
140
|
|
|
140
|
-
#
|
|
141
|
+
# Get screenshot UUID - either extract from URL or upload
|
|
141
142
|
screenshot_uuid = None
|
|
142
143
|
if screenshot:
|
|
143
|
-
|
|
144
|
-
|
|
144
|
+
# Check if screenshot is already a URL (already uploaded to S3)
|
|
145
|
+
if isinstance(screenshot, str):
|
|
146
|
+
screenshot_uuid = extract_uuid_from_url(screenshot)
|
|
147
|
+
# If not a URL or UUID extraction failed, upload the image
|
|
148
|
+
if not screenshot_uuid:
|
|
149
|
+
upload_response = await client.put_s3_presigned_url(screenshot)
|
|
150
|
+
screenshot_uuid = upload_response.uuid
|
|
145
151
|
|
|
146
152
|
# Extract memory data if provided
|
|
147
153
|
(
|
|
@@ -175,7 +181,7 @@ class Planner:
|
|
|
175
181
|
memory: PlannerMemory | None = None,
|
|
176
182
|
todo_index: int | None = None,
|
|
177
183
|
current_instruction: str | None = None,
|
|
178
|
-
reflection_interval: int =
|
|
184
|
+
reflection_interval: int = DEFAULT_REFLECTION_INTERVAL,
|
|
179
185
|
) -> ReflectionOutput:
|
|
180
186
|
"""Reflect on recent actions and progress.
|
|
181
187
|
|
|
@@ -194,11 +200,16 @@ class Planner:
|
|
|
194
200
|
# Ensure we have a client
|
|
195
201
|
client = self._ensure_client()
|
|
196
202
|
|
|
197
|
-
#
|
|
203
|
+
# Get screenshot UUID - either extract from URL or upload
|
|
198
204
|
result_screenshot_uuid = None
|
|
199
205
|
if screenshot:
|
|
200
|
-
|
|
201
|
-
|
|
206
|
+
# Check if screenshot is already a URL (already uploaded to S3)
|
|
207
|
+
if isinstance(screenshot, str):
|
|
208
|
+
result_screenshot_uuid = extract_uuid_from_url(screenshot)
|
|
209
|
+
# If not a URL or UUID extraction failed, upload the image
|
|
210
|
+
if not result_screenshot_uuid:
|
|
211
|
+
upload_response = await client.put_s3_presigned_url(screenshot)
|
|
212
|
+
result_screenshot_uuid = upload_response.uuid
|
|
202
213
|
|
|
203
214
|
# Extract memory data if provided
|
|
204
215
|
(
|
|
@@ -12,6 +12,14 @@ from datetime import datetime
|
|
|
12
12
|
from typing import Any
|
|
13
13
|
|
|
14
14
|
from oagi import AsyncActor
|
|
15
|
+
from oagi.constants import (
|
|
16
|
+
DEFAULT_MAX_STEPS,
|
|
17
|
+
DEFAULT_REFLECTION_INTERVAL,
|
|
18
|
+
DEFAULT_STEP_DELAY,
|
|
19
|
+
DEFAULT_TEMPERATURE,
|
|
20
|
+
MODEL_ACTOR,
|
|
21
|
+
)
|
|
22
|
+
from oagi.handler import reset_handler
|
|
15
23
|
from oagi.types import (
|
|
16
24
|
URL,
|
|
17
25
|
ActionEvent,
|
|
@@ -21,6 +29,7 @@ from oagi.types import (
|
|
|
21
29
|
Image,
|
|
22
30
|
PlanEvent,
|
|
23
31
|
StepEvent,
|
|
32
|
+
extract_uuid_from_url,
|
|
24
33
|
)
|
|
25
34
|
|
|
26
35
|
from ..protocol import AsyncAgent
|
|
@@ -52,15 +61,15 @@ class TaskeeAgent(AsyncAgent):
|
|
|
52
61
|
self,
|
|
53
62
|
api_key: str | None = None,
|
|
54
63
|
base_url: str | None = None,
|
|
55
|
-
model: str =
|
|
56
|
-
max_steps: int =
|
|
57
|
-
reflection_interval: int =
|
|
58
|
-
temperature: float =
|
|
64
|
+
model: str = MODEL_ACTOR,
|
|
65
|
+
max_steps: int = DEFAULT_MAX_STEPS,
|
|
66
|
+
reflection_interval: int = DEFAULT_REFLECTION_INTERVAL,
|
|
67
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
59
68
|
planner: Planner | None = None,
|
|
60
69
|
external_memory: PlannerMemory | None = None,
|
|
61
70
|
todo_index: int | None = None,
|
|
62
71
|
step_observer: AsyncObserver | None = None,
|
|
63
|
-
step_delay: float =
|
|
72
|
+
step_delay: float = DEFAULT_STEP_DELAY,
|
|
64
73
|
):
|
|
65
74
|
"""Initialize the taskee agent.
|
|
66
75
|
|
|
@@ -114,6 +123,9 @@ class TaskeeAgent(AsyncAgent):
|
|
|
114
123
|
Returns:
|
|
115
124
|
True if successful, False otherwise
|
|
116
125
|
"""
|
|
126
|
+
# Reset handler state at todo execution start
|
|
127
|
+
reset_handler(action_handler)
|
|
128
|
+
|
|
117
129
|
self.current_todo = instruction
|
|
118
130
|
self.actions = []
|
|
119
131
|
self.total_actions = 0
|
|
@@ -249,11 +261,21 @@ class TaskeeAgent(AsyncAgent):
|
|
|
249
261
|
# Capture screenshot
|
|
250
262
|
screenshot = await image_provider()
|
|
251
263
|
|
|
252
|
-
#
|
|
264
|
+
# Get screenshot UUID - either extract from URL or upload
|
|
253
265
|
try:
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
266
|
+
screenshot_uuid = None
|
|
267
|
+
screenshot_url = None
|
|
268
|
+
|
|
269
|
+
# Check if screenshot is already a URL (from SocketIOImageProvider)
|
|
270
|
+
if isinstance(screenshot, str):
|
|
271
|
+
screenshot_uuid = extract_uuid_from_url(screenshot)
|
|
272
|
+
screenshot_url = screenshot
|
|
273
|
+
|
|
274
|
+
# If not a URL or UUID extraction failed, upload the image
|
|
275
|
+
if not screenshot_uuid:
|
|
276
|
+
upload_response = await client.put_s3_presigned_url(screenshot)
|
|
277
|
+
screenshot_uuid = upload_response.uuid
|
|
278
|
+
screenshot_url = upload_response.download_url
|
|
257
279
|
except Exception as e:
|
|
258
280
|
logger.error(f"Error uploading screenshot: {e}")
|
|
259
281
|
self._record_action(
|
|
@@ -9,6 +9,14 @@
|
|
|
9
9
|
import logging
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
|
+
from oagi.constants import (
|
|
13
|
+
DEFAULT_MAX_STEPS_TASKER,
|
|
14
|
+
DEFAULT_REFLECTION_INTERVAL,
|
|
15
|
+
DEFAULT_STEP_DELAY,
|
|
16
|
+
DEFAULT_TEMPERATURE,
|
|
17
|
+
MODEL_ACTOR,
|
|
18
|
+
)
|
|
19
|
+
from oagi.handler import reset_handler
|
|
12
20
|
from oagi.types import AsyncActionHandler, AsyncImageProvider, AsyncObserver, SplitEvent
|
|
13
21
|
|
|
14
22
|
from ..protocol import AsyncAgent
|
|
@@ -34,13 +42,13 @@ class TaskerAgent(AsyncAgent):
|
|
|
34
42
|
self,
|
|
35
43
|
api_key: str | None = None,
|
|
36
44
|
base_url: str | None = None,
|
|
37
|
-
model: str =
|
|
38
|
-
max_steps: int =
|
|
39
|
-
temperature: float =
|
|
40
|
-
reflection_interval: int =
|
|
45
|
+
model: str = MODEL_ACTOR,
|
|
46
|
+
max_steps: int = DEFAULT_MAX_STEPS_TASKER,
|
|
47
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
48
|
+
reflection_interval: int = DEFAULT_REFLECTION_INTERVAL,
|
|
41
49
|
planner: Planner | None = None,
|
|
42
50
|
step_observer: AsyncObserver | None = None,
|
|
43
|
-
step_delay: float =
|
|
51
|
+
step_delay: float = DEFAULT_STEP_DELAY,
|
|
44
52
|
):
|
|
45
53
|
"""Initialize the tasker agent.
|
|
46
54
|
|
|
@@ -105,6 +113,9 @@ class TaskerAgent(AsyncAgent):
|
|
|
105
113
|
Returns:
|
|
106
114
|
True if all todos completed successfully, False otherwise
|
|
107
115
|
"""
|
|
116
|
+
# Reset handler state at automation start
|
|
117
|
+
reset_handler(action_handler)
|
|
118
|
+
|
|
108
119
|
overall_success = True
|
|
109
120
|
|
|
110
121
|
# Execute todos until none remain
|
oagi/cli/agent.py
CHANGED
|
@@ -14,6 +14,14 @@ import time
|
|
|
14
14
|
import traceback
|
|
15
15
|
|
|
16
16
|
from oagi.agent.observer import AsyncAgentObserver
|
|
17
|
+
from oagi.constants import (
|
|
18
|
+
API_KEY_HELP_URL,
|
|
19
|
+
DEFAULT_BASE_URL,
|
|
20
|
+
DEFAULT_MAX_STEPS_THINKER,
|
|
21
|
+
DEFAULT_STEP_DELAY,
|
|
22
|
+
MODE_ACTOR,
|
|
23
|
+
MODEL_THINKER,
|
|
24
|
+
)
|
|
17
25
|
from oagi.exceptions import check_optional_dependency
|
|
18
26
|
|
|
19
27
|
from .display import display_step_table
|
|
@@ -29,22 +37,30 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
29
37
|
"run", help="Run an agent with the given instruction"
|
|
30
38
|
)
|
|
31
39
|
run_parser.add_argument(
|
|
32
|
-
"instruction",
|
|
40
|
+
"instruction",
|
|
41
|
+
type=str,
|
|
42
|
+
nargs="?",
|
|
43
|
+
default="",
|
|
44
|
+
help="Task instruction for the agent to execute (optional for pre-configured modes)",
|
|
33
45
|
)
|
|
34
46
|
run_parser.add_argument(
|
|
35
|
-
"--model", type=str, help="Model to use (default:
|
|
47
|
+
"--model", type=str, help="Model to use (default: determined by mode)"
|
|
36
48
|
)
|
|
37
49
|
run_parser.add_argument(
|
|
38
|
-
"--max-steps",
|
|
50
|
+
"--max-steps",
|
|
51
|
+
type=int,
|
|
52
|
+
help="Maximum number of steps (default: determined by mode)",
|
|
39
53
|
)
|
|
40
54
|
run_parser.add_argument(
|
|
41
|
-
"--temperature",
|
|
55
|
+
"--temperature",
|
|
56
|
+
type=float,
|
|
57
|
+
help="Sampling temperature (default: determined by mode)",
|
|
42
58
|
)
|
|
43
59
|
run_parser.add_argument(
|
|
44
60
|
"--mode",
|
|
45
61
|
type=str,
|
|
46
|
-
default=
|
|
47
|
-
help="Agent mode to use (default:
|
|
62
|
+
default=MODE_ACTOR,
|
|
63
|
+
help=f"Agent mode to use (default: {MODE_ACTOR}). Use 'oagi agent modes' to list available modes",
|
|
48
64
|
)
|
|
49
65
|
run_parser.add_argument(
|
|
50
66
|
"--oagi-api-key", type=str, help="OAGI API key (default: OAGI_API_KEY env var)"
|
|
@@ -52,7 +68,7 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
52
68
|
run_parser.add_argument(
|
|
53
69
|
"--oagi-base-url",
|
|
54
70
|
type=str,
|
|
55
|
-
help="OAGI base URL (default:
|
|
71
|
+
help=f"OAGI base URL (default: {DEFAULT_BASE_URL}, or OAGI_BASE_URL env var)",
|
|
56
72
|
)
|
|
57
73
|
run_parser.add_argument(
|
|
58
74
|
"--export",
|
|
@@ -68,9 +84,12 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
68
84
|
run_parser.add_argument(
|
|
69
85
|
"--step-delay",
|
|
70
86
|
type=float,
|
|
71
|
-
help="Delay in seconds after each step before next screenshot (default:
|
|
87
|
+
help=f"Delay in seconds after each step before next screenshot (default: {DEFAULT_STEP_DELAY})",
|
|
72
88
|
)
|
|
73
89
|
|
|
90
|
+
# agent modes command
|
|
91
|
+
agent_subparsers.add_parser("modes", help="List available agent modes")
|
|
92
|
+
|
|
74
93
|
# agent permission command
|
|
75
94
|
agent_subparsers.add_parser(
|
|
76
95
|
"permission",
|
|
@@ -81,10 +100,22 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
81
100
|
def handle_agent_command(args: argparse.Namespace) -> None:
|
|
82
101
|
if args.agent_command == "run":
|
|
83
102
|
run_agent(args)
|
|
103
|
+
elif args.agent_command == "modes":
|
|
104
|
+
list_modes()
|
|
84
105
|
elif args.agent_command == "permission":
|
|
85
106
|
check_permissions()
|
|
86
107
|
|
|
87
108
|
|
|
109
|
+
def list_modes() -> None:
|
|
110
|
+
"""List all available agent modes."""
|
|
111
|
+
from oagi.agent import list_agent_modes # noqa: PLC0415
|
|
112
|
+
|
|
113
|
+
modes = list_agent_modes()
|
|
114
|
+
print("Available agent modes:")
|
|
115
|
+
for mode in modes:
|
|
116
|
+
print(f" - {mode}")
|
|
117
|
+
|
|
118
|
+
|
|
88
119
|
def check_permissions() -> None:
|
|
89
120
|
"""Check and request macOS permissions for screen recording and accessibility.
|
|
90
121
|
|
|
@@ -189,19 +220,15 @@ def run_agent(args: argparse.Namespace) -> None:
|
|
|
189
220
|
if not api_key:
|
|
190
221
|
print(
|
|
191
222
|
"Error: OAGI API key not provided.\n"
|
|
192
|
-
"Set OAGI_API_KEY environment variable or use --oagi-api-key flag
|
|
223
|
+
"Set OAGI_API_KEY environment variable or use --oagi-api-key flag.\n"
|
|
224
|
+
f"Get your API key at {API_KEY_HELP_URL}",
|
|
193
225
|
file=sys.stderr,
|
|
194
226
|
)
|
|
195
227
|
sys.exit(1)
|
|
196
228
|
|
|
197
|
-
base_url = args.oagi_base_url or os.getenv(
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
model = args.model or "lux-actor-1"
|
|
201
|
-
max_steps = args.max_steps or 20
|
|
202
|
-
temperature = args.temperature if args.temperature is not None else 0.5
|
|
203
|
-
mode = args.mode or "actor"
|
|
204
|
-
step_delay = args.step_delay if args.step_delay is not None else 0.3
|
|
229
|
+
base_url = args.oagi_base_url or os.getenv("OAGI_BASE_URL", DEFAULT_BASE_URL)
|
|
230
|
+
mode = args.mode or MODE_ACTOR
|
|
231
|
+
step_delay = args.step_delay if args.step_delay is not None else DEFAULT_STEP_DELAY
|
|
205
232
|
export_format = args.export
|
|
206
233
|
export_file = args.export_file
|
|
207
234
|
|
|
@@ -218,26 +245,38 @@ def run_agent(args: argparse.Namespace) -> None:
|
|
|
218
245
|
|
|
219
246
|
observer = CombinedObserver()
|
|
220
247
|
|
|
221
|
-
#
|
|
222
|
-
|
|
223
|
-
mode
|
|
224
|
-
api_key
|
|
225
|
-
base_url
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
248
|
+
# Build agent kwargs - only pass explicitly provided values, let factory use defaults
|
|
249
|
+
agent_kwargs = {
|
|
250
|
+
"mode": mode,
|
|
251
|
+
"api_key": api_key,
|
|
252
|
+
"base_url": base_url,
|
|
253
|
+
"step_observer": observer,
|
|
254
|
+
"step_delay": step_delay,
|
|
255
|
+
}
|
|
256
|
+
if args.model:
|
|
257
|
+
agent_kwargs["model"] = args.model
|
|
258
|
+
# If thinker model specified without max_steps, use thinker's default
|
|
259
|
+
if args.model == MODEL_THINKER and not args.max_steps:
|
|
260
|
+
agent_kwargs["max_steps"] = DEFAULT_MAX_STEPS_THINKER
|
|
261
|
+
if args.max_steps:
|
|
262
|
+
agent_kwargs["max_steps"] = args.max_steps
|
|
263
|
+
if args.temperature is not None:
|
|
264
|
+
agent_kwargs["temperature"] = args.temperature
|
|
265
|
+
|
|
266
|
+
# Create agent
|
|
267
|
+
agent = create_agent(**agent_kwargs)
|
|
232
268
|
|
|
233
269
|
# Create handlers
|
|
234
270
|
action_handler = AsyncPyautoguiActionHandler()
|
|
235
271
|
image_provider = AsyncScreenshotMaker()
|
|
236
272
|
|
|
237
|
-
|
|
273
|
+
if args.instruction:
|
|
274
|
+
print(f"Starting agent with instruction: {args.instruction}")
|
|
275
|
+
else:
|
|
276
|
+
print(f"Starting agent with mode: {mode} (using pre-configured instruction)")
|
|
238
277
|
print(
|
|
239
|
-
f"Mode: {mode}, Model: {model}, Max steps: {max_steps}, "
|
|
240
|
-
f"Temperature: {temperature}, Step delay: {step_delay}s"
|
|
278
|
+
f"Mode: {mode}, Model: {agent.model}, Max steps: {agent.max_steps}, "
|
|
279
|
+
f"Temperature: {agent.temperature}, Step delay: {step_delay}s"
|
|
241
280
|
)
|
|
242
281
|
print("-" * 60)
|
|
243
282
|
|
oagi/cli/display.py
CHANGED
|
@@ -29,7 +29,8 @@ def display_step_table(
|
|
|
29
29
|
actions_display = []
|
|
30
30
|
for action in step.actions[:3]:
|
|
31
31
|
arg = action.argument[:20] if action.argument else ""
|
|
32
|
-
|
|
32
|
+
count_str = f" x{action.count}" if action.count and action.count > 1 else ""
|
|
33
|
+
actions_display.append(f"{action.type.value}({arg}){count_str}")
|
|
33
34
|
|
|
34
35
|
actions_str = ", ".join(actions_display)
|
|
35
36
|
if len(step.actions) > 3:
|
oagi/cli/server.py
CHANGED
|
@@ -25,7 +25,7 @@ def add_server_parser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
25
25
|
start_parser.add_argument(
|
|
26
26
|
"--host",
|
|
27
27
|
type=str,
|
|
28
|
-
help="Server host (default:
|
|
28
|
+
help="Server host (default: 127.0.0.1, or OAGI_SERVER_HOST env var)",
|
|
29
29
|
)
|
|
30
30
|
start_parser.add_argument(
|
|
31
31
|
"--port",
|
oagi/cli/utils.py
CHANGED
|
@@ -11,6 +11,7 @@ import os
|
|
|
11
11
|
import sys
|
|
12
12
|
from importlib.metadata import version as get_version
|
|
13
13
|
|
|
14
|
+
from oagi.constants import DEFAULT_BASE_URL, MODEL_ACTOR
|
|
14
15
|
from oagi.exceptions import check_optional_dependency
|
|
15
16
|
|
|
16
17
|
|
|
@@ -55,10 +56,10 @@ def display_version() -> None:
|
|
|
55
56
|
def display_config() -> None:
|
|
56
57
|
config_vars = {
|
|
57
58
|
"OAGI_API_KEY": os.getenv("OAGI_API_KEY", ""),
|
|
58
|
-
"OAGI_BASE_URL": os.getenv("OAGI_BASE_URL",
|
|
59
|
-
"OAGI_DEFAULT_MODEL": os.getenv("OAGI_DEFAULT_MODEL",
|
|
59
|
+
"OAGI_BASE_URL": os.getenv("OAGI_BASE_URL", DEFAULT_BASE_URL),
|
|
60
|
+
"OAGI_DEFAULT_MODEL": os.getenv("OAGI_DEFAULT_MODEL", MODEL_ACTOR),
|
|
60
61
|
"OAGI_LOG_LEVEL": os.getenv("OAGI_LOG_LEVEL", "INFO"),
|
|
61
|
-
"OAGI_SERVER_HOST": os.getenv("OAGI_SERVER_HOST", "
|
|
62
|
+
"OAGI_SERVER_HOST": os.getenv("OAGI_SERVER_HOST", "127.0.0.1"),
|
|
62
63
|
"OAGI_SERVER_PORT": os.getenv("OAGI_SERVER_PORT", "8000"),
|
|
63
64
|
"OAGI_MAX_STEPS": os.getenv("OAGI_MAX_STEPS", "30"),
|
|
64
65
|
}
|
oagi/client/async_.py
CHANGED
|
@@ -10,6 +10,13 @@ from functools import wraps
|
|
|
10
10
|
|
|
11
11
|
import httpx
|
|
12
12
|
|
|
13
|
+
from ..constants import (
|
|
14
|
+
API_HEALTH_ENDPOINT,
|
|
15
|
+
API_V1_FILE_UPLOAD_ENDPOINT,
|
|
16
|
+
API_V1_GENERATE_ENDPOINT,
|
|
17
|
+
API_V2_MESSAGE_ENDPOINT,
|
|
18
|
+
HTTP_CLIENT_TIMEOUT,
|
|
19
|
+
)
|
|
13
20
|
from ..logging import get_logger
|
|
14
21
|
from ..types import Image
|
|
15
22
|
from ..types.models import GenerateResponse, LLMResponse, UploadFileResponse
|
|
@@ -41,7 +48,7 @@ class AsyncClient(BaseClient[httpx.AsyncClient]):
|
|
|
41
48
|
def __init__(self, base_url: str | None = None, api_key: str | None = None):
|
|
42
49
|
super().__init__(base_url, api_key)
|
|
43
50
|
self.client = httpx.AsyncClient(base_url=self.base_url)
|
|
44
|
-
self.upload_client = httpx.AsyncClient(timeout=
|
|
51
|
+
self.upload_client = httpx.AsyncClient(timeout=HTTP_CLIENT_TIMEOUT)
|
|
45
52
|
logger.info(f"AsyncClient initialized with base_url: {self.base_url}")
|
|
46
53
|
|
|
47
54
|
async def __aenter__(self):
|
|
@@ -121,7 +128,10 @@ class AsyncClient(BaseClient[httpx.AsyncClient]):
|
|
|
121
128
|
# Make request
|
|
122
129
|
try:
|
|
123
130
|
response = await self.client.post(
|
|
124
|
-
|
|
131
|
+
API_V2_MESSAGE_ENDPOINT,
|
|
132
|
+
json=payload,
|
|
133
|
+
headers=headers,
|
|
134
|
+
timeout=self.timeout,
|
|
125
135
|
)
|
|
126
136
|
return self._process_response(response)
|
|
127
137
|
except (httpx.TimeoutException, httpx.NetworkError) as e:
|
|
@@ -136,7 +146,7 @@ class AsyncClient(BaseClient[httpx.AsyncClient]):
|
|
|
136
146
|
"""
|
|
137
147
|
logger.debug("Making async health check request")
|
|
138
148
|
try:
|
|
139
|
-
response = await self.client.get(
|
|
149
|
+
response = await self.client.get(API_HEALTH_ENDPOINT)
|
|
140
150
|
response.raise_for_status()
|
|
141
151
|
result = response.json()
|
|
142
152
|
logger.debug("Async health check successful")
|
|
@@ -158,12 +168,12 @@ class AsyncClient(BaseClient[httpx.AsyncClient]):
|
|
|
158
168
|
Returns:
|
|
159
169
|
UploadFileResponse: The response from /v1/file/upload with uuid and presigned S3 URL
|
|
160
170
|
"""
|
|
161
|
-
logger.debug("Making async API request to
|
|
171
|
+
logger.debug(f"Making async API request to {API_V1_FILE_UPLOAD_ENDPOINT}")
|
|
162
172
|
|
|
163
173
|
try:
|
|
164
174
|
headers = self._build_headers(api_version)
|
|
165
175
|
response = await self.client.get(
|
|
166
|
-
|
|
176
|
+
API_V1_FILE_UPLOAD_ENDPOINT, headers=headers, timeout=self.timeout
|
|
167
177
|
)
|
|
168
178
|
return self._process_upload_response(response)
|
|
169
179
|
except (httpx.TimeoutException, httpx.NetworkError, httpx.HTTPStatusError) as e:
|
|
@@ -283,7 +293,10 @@ class AsyncClient(BaseClient[httpx.AsyncClient]):
|
|
|
283
293
|
# Make request
|
|
284
294
|
try:
|
|
285
295
|
response = await self.client.post(
|
|
286
|
-
|
|
296
|
+
API_V1_GENERATE_ENDPOINT,
|
|
297
|
+
json=payload,
|
|
298
|
+
headers=headers,
|
|
299
|
+
timeout=self.timeout,
|
|
287
300
|
)
|
|
288
301
|
return self._process_generate_response(response)
|
|
289
302
|
except (httpx.TimeoutException, httpx.NetworkError) as e:
|