oagi-core 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/__init__.py +108 -0
- oagi/agent/__init__.py +31 -0
- oagi/agent/default.py +75 -0
- oagi/agent/factories.py +50 -0
- oagi/agent/protocol.py +55 -0
- oagi/agent/registry.py +155 -0
- oagi/agent/tasker/__init__.py +35 -0
- oagi/agent/tasker/memory.py +184 -0
- oagi/agent/tasker/models.py +83 -0
- oagi/agent/tasker/planner.py +385 -0
- oagi/agent/tasker/taskee_agent.py +395 -0
- oagi/agent/tasker/tasker_agent.py +323 -0
- oagi/async_pyautogui_action_handler.py +44 -0
- oagi/async_screenshot_maker.py +47 -0
- oagi/async_single_step.py +85 -0
- oagi/cli/__init__.py +11 -0
- oagi/cli/agent.py +125 -0
- oagi/cli/main.py +77 -0
- oagi/cli/server.py +94 -0
- oagi/cli/utils.py +82 -0
- oagi/client/__init__.py +12 -0
- oagi/client/async_.py +293 -0
- oagi/client/base.py +465 -0
- oagi/client/sync.py +296 -0
- oagi/exceptions.py +118 -0
- oagi/logging.py +47 -0
- oagi/pil_image.py +102 -0
- oagi/pyautogui_action_handler.py +268 -0
- oagi/screenshot_maker.py +41 -0
- oagi/server/__init__.py +13 -0
- oagi/server/agent_wrappers.py +98 -0
- oagi/server/config.py +46 -0
- oagi/server/main.py +157 -0
- oagi/server/models.py +98 -0
- oagi/server/session_store.py +116 -0
- oagi/server/socketio_server.py +405 -0
- oagi/single_step.py +87 -0
- oagi/task/__init__.py +14 -0
- oagi/task/async_.py +97 -0
- oagi/task/async_short.py +64 -0
- oagi/task/base.py +121 -0
- oagi/task/short.py +64 -0
- oagi/task/sync.py +97 -0
- oagi/types/__init__.py +28 -0
- oagi/types/action_handler.py +30 -0
- oagi/types/async_action_handler.py +30 -0
- oagi/types/async_image_provider.py +37 -0
- oagi/types/image.py +17 -0
- oagi/types/image_provider.py +34 -0
- oagi/types/models/__init__.py +32 -0
- oagi/types/models/action.py +33 -0
- oagi/types/models/client.py +64 -0
- oagi/types/models/image_config.py +47 -0
- oagi/types/models/step.py +17 -0
- oagi/types/url_image.py +47 -0
- oagi_core-0.9.0.dist-info/METADATA +257 -0
- oagi_core-0.9.0.dist-info/RECORD +60 -0
- oagi_core-0.9.0.dist-info/WHEEL +4 -0
- oagi_core-0.9.0.dist-info/entry_points.txt +2 -0
- oagi_core-0.9.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from oagi.types import AsyncActionHandler, AsyncImageProvider
|
|
13
|
+
|
|
14
|
+
from ..protocol import AsyncAgent
|
|
15
|
+
from .memory import PlannerMemory
|
|
16
|
+
from .models import TodoStatus
|
|
17
|
+
from .planner import Planner
|
|
18
|
+
from .taskee_agent import TaskeeAgent
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TaskerAgent(AsyncAgent):
|
|
24
|
+
"""Hierarchical agent that manages multi-todo workflows.
|
|
25
|
+
|
|
26
|
+
This agent orchestrates the execution of multiple todos by:
|
|
27
|
+
1. Managing a workflow with todos and deliverables
|
|
28
|
+
2. Executing todos sequentially using TaskeeAgent
|
|
29
|
+
3. Tracking progress and updating memory
|
|
30
|
+
4. Sharing context between todos for informed execution
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
api_key: str | None = None,
|
|
36
|
+
base_url: str | None = None,
|
|
37
|
+
model: str = "lux-v1",
|
|
38
|
+
max_steps: int = 30,
|
|
39
|
+
temperature: float = 0.0,
|
|
40
|
+
reflection_interval: int = 20,
|
|
41
|
+
planner: Planner | None = None,
|
|
42
|
+
):
|
|
43
|
+
"""Initialize the tasker agent.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
api_key: OAGI API key
|
|
47
|
+
base_url: OAGI API base URL
|
|
48
|
+
model: Model to use for vision tasks
|
|
49
|
+
max_steps: Maximum steps per todo
|
|
50
|
+
temperature: Sampling temperature
|
|
51
|
+
reflection_interval: Actions before reflection
|
|
52
|
+
planner: Planner for planning and reflection
|
|
53
|
+
"""
|
|
54
|
+
self.api_key = api_key
|
|
55
|
+
self.base_url = base_url
|
|
56
|
+
self.model = model
|
|
57
|
+
self.max_steps = max_steps
|
|
58
|
+
self.temperature = temperature
|
|
59
|
+
self.reflection_interval = reflection_interval
|
|
60
|
+
self.planner = planner or Planner()
|
|
61
|
+
|
|
62
|
+
# Memory for tracking workflow
|
|
63
|
+
self.memory = PlannerMemory()
|
|
64
|
+
|
|
65
|
+
# Current execution state
|
|
66
|
+
self.current_taskee_agent: TaskeeAgent | None = None
|
|
67
|
+
self.current_todo_index: int = -1
|
|
68
|
+
|
|
69
|
+
def set_task(
|
|
70
|
+
self,
|
|
71
|
+
task: str,
|
|
72
|
+
todos: list[str],
|
|
73
|
+
deliverables: list[str] | None = None,
|
|
74
|
+
) -> None:
|
|
75
|
+
"""Set the task, todos, and deliverables for the workflow.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
task: Overall task description
|
|
79
|
+
todos: List of todo descriptions
|
|
80
|
+
deliverables: Optional list of deliverable descriptions
|
|
81
|
+
"""
|
|
82
|
+
self.memory.set_task(task, todos, deliverables)
|
|
83
|
+
logger.info(
|
|
84
|
+
f"Task set with {len(todos)} todos and "
|
|
85
|
+
f"{len(deliverables) if deliverables else 0} deliverables"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
async def execute(
|
|
89
|
+
self,
|
|
90
|
+
instruction: str,
|
|
91
|
+
action_handler: AsyncActionHandler,
|
|
92
|
+
image_provider: AsyncImageProvider,
|
|
93
|
+
) -> bool:
|
|
94
|
+
"""Execute the multi-todo workflow.
|
|
95
|
+
|
|
96
|
+
This method will execute todos sequentially until all are complete
|
|
97
|
+
or a failure occurs.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
instruction: Overall instruction (can be same as task description)
|
|
101
|
+
action_handler: Handler for executing actions
|
|
102
|
+
image_provider: Provider for capturing screenshots
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
True if all todos completed successfully, False otherwise
|
|
106
|
+
"""
|
|
107
|
+
# If task not set, use instruction as task description
|
|
108
|
+
if not self.memory.task_description:
|
|
109
|
+
logger.warning("Task not set, using instruction as task description")
|
|
110
|
+
self.memory.task_description = instruction
|
|
111
|
+
|
|
112
|
+
overall_success = True
|
|
113
|
+
|
|
114
|
+
# Execute todos until none remain
|
|
115
|
+
while True:
|
|
116
|
+
# Prepare for next todo
|
|
117
|
+
todo_info = self._prepare()
|
|
118
|
+
|
|
119
|
+
if todo_info is None:
|
|
120
|
+
# No more todos to execute
|
|
121
|
+
logger.info("No more todos to execute")
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
todo, todo_index = todo_info
|
|
125
|
+
logger.info(f"Executing todo {todo_index}: {todo.description}")
|
|
126
|
+
|
|
127
|
+
# Execute the todo
|
|
128
|
+
success = await self._execute_todo(
|
|
129
|
+
todo_index,
|
|
130
|
+
action_handler,
|
|
131
|
+
image_provider,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
if not success:
|
|
135
|
+
logger.warning(f"Todo {todo_index} failed")
|
|
136
|
+
overall_success = False
|
|
137
|
+
# If todo failed due to exception, it stays IN_PROGRESS
|
|
138
|
+
# Break to avoid infinite loop re-attempting same todo
|
|
139
|
+
current_status = self.memory.todos[todo_index].status
|
|
140
|
+
if current_status == TodoStatus.IN_PROGRESS:
|
|
141
|
+
logger.error("Todo failed with exception, stopping execution")
|
|
142
|
+
break
|
|
143
|
+
# Otherwise continue with next todo
|
|
144
|
+
|
|
145
|
+
# Update task execution summary
|
|
146
|
+
self._update_task_summary()
|
|
147
|
+
|
|
148
|
+
# Log final status
|
|
149
|
+
status_summary = self.memory.get_todo_status_summary()
|
|
150
|
+
logger.info(f"Workflow complete. Status summary: {status_summary}")
|
|
151
|
+
|
|
152
|
+
return overall_success
|
|
153
|
+
|
|
154
|
+
def _prepare(self) -> tuple[Any, int] | None:
|
|
155
|
+
"""Prepare for the next todo execution.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Tuple of (todo, index) or None if no todos remain
|
|
159
|
+
"""
|
|
160
|
+
# Get current todo
|
|
161
|
+
todo, todo_index = self.memory.get_current_todo()
|
|
162
|
+
|
|
163
|
+
if todo is None:
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
# Create taskee agent with external memory
|
|
167
|
+
self.current_taskee_agent = TaskeeAgent(
|
|
168
|
+
api_key=self.api_key,
|
|
169
|
+
base_url=self.base_url,
|
|
170
|
+
model=self.model,
|
|
171
|
+
max_steps_per_subtask=10, # Smaller steps per subtask
|
|
172
|
+
reflection_interval=self.reflection_interval,
|
|
173
|
+
temperature=self.temperature,
|
|
174
|
+
planner=self.planner,
|
|
175
|
+
external_memory=self.memory, # Share memory with child
|
|
176
|
+
todo_index=todo_index, # Pass the todo index
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
self.current_todo_index = todo_index
|
|
180
|
+
|
|
181
|
+
# Update todo status to in_progress if it was pending
|
|
182
|
+
if todo.status == TodoStatus.PENDING:
|
|
183
|
+
self.memory.update_todo(todo_index, TodoStatus.IN_PROGRESS)
|
|
184
|
+
|
|
185
|
+
logger.info(f"Prepared taskee agent for todo {todo_index}")
|
|
186
|
+
|
|
187
|
+
return todo, todo_index
|
|
188
|
+
|
|
189
|
+
async def _execute_todo(
|
|
190
|
+
self,
|
|
191
|
+
todo_index: int,
|
|
192
|
+
action_handler: AsyncActionHandler,
|
|
193
|
+
image_provider: AsyncImageProvider,
|
|
194
|
+
) -> bool:
|
|
195
|
+
"""Execute a single todo using the todo agent.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
todo_index: Index of the todo to execute
|
|
199
|
+
action_handler: Handler for executing actions
|
|
200
|
+
image_provider: Provider for capturing screenshots
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
True if successful, False otherwise
|
|
204
|
+
"""
|
|
205
|
+
if not self.current_taskee_agent or todo_index < 0:
|
|
206
|
+
logger.error("No taskee agent prepared")
|
|
207
|
+
return False
|
|
208
|
+
|
|
209
|
+
todo = self.memory.todos[todo_index]
|
|
210
|
+
|
|
211
|
+
try:
|
|
212
|
+
# Execute using taskee agent
|
|
213
|
+
success = await self.current_taskee_agent.execute(
|
|
214
|
+
todo.description,
|
|
215
|
+
action_handler,
|
|
216
|
+
image_provider,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Get execution results
|
|
220
|
+
results = self.current_taskee_agent.return_execution_results()
|
|
221
|
+
|
|
222
|
+
# Update memory with results
|
|
223
|
+
self._update_memory_from_execution(todo_index, results, success)
|
|
224
|
+
|
|
225
|
+
return success
|
|
226
|
+
|
|
227
|
+
except Exception as e:
|
|
228
|
+
logger.error(f"Error executing todo {todo_index}: {e}")
|
|
229
|
+
# Mark as in_progress (not completed)
|
|
230
|
+
self.memory.update_todo(
|
|
231
|
+
todo_index,
|
|
232
|
+
TodoStatus.IN_PROGRESS,
|
|
233
|
+
summary=f"Execution failed: {str(e)}",
|
|
234
|
+
)
|
|
235
|
+
return False
|
|
236
|
+
|
|
237
|
+
def _update_memory_from_execution(
|
|
238
|
+
self,
|
|
239
|
+
todo_index: int,
|
|
240
|
+
results: Any,
|
|
241
|
+
success: bool,
|
|
242
|
+
) -> None:
|
|
243
|
+
"""Update memory based on execution results.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
todo_index: Index of the executed todo
|
|
247
|
+
results: Execution results from todo agent
|
|
248
|
+
success: Whether execution was successful
|
|
249
|
+
"""
|
|
250
|
+
# Update todo status
|
|
251
|
+
status = TodoStatus.COMPLETED if success else TodoStatus.IN_PROGRESS
|
|
252
|
+
self.memory.update_todo(
|
|
253
|
+
todo_index,
|
|
254
|
+
status,
|
|
255
|
+
summary=results.summary,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Add to history
|
|
259
|
+
self.memory.add_history(
|
|
260
|
+
todo_index,
|
|
261
|
+
results.actions,
|
|
262
|
+
summary=results.summary,
|
|
263
|
+
completed=success,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Update task execution summary
|
|
267
|
+
if success:
|
|
268
|
+
if self.memory.task_execution_summary:
|
|
269
|
+
self.memory.task_execution_summary += (
|
|
270
|
+
f"\n- Completed todo {todo_index}: {results.summary}"
|
|
271
|
+
)
|
|
272
|
+
else:
|
|
273
|
+
self.memory.task_execution_summary = (
|
|
274
|
+
f"- Completed todo {todo_index}: {results.summary}"
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
logger.info(
|
|
278
|
+
f"Updated memory for todo {todo_index}: "
|
|
279
|
+
f"status={status}, actions={len(results.actions)}"
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
def _update_task_summary(self) -> None:
|
|
283
|
+
"""Update the overall task execution summary."""
|
|
284
|
+
status_summary = self.memory.get_todo_status_summary()
|
|
285
|
+
completed = status_summary.get(TodoStatus.COMPLETED, 0)
|
|
286
|
+
total = len(self.memory.todos)
|
|
287
|
+
|
|
288
|
+
summary_parts = [f"Progress: {completed}/{total} todos completed"]
|
|
289
|
+
|
|
290
|
+
# Add recent completions
|
|
291
|
+
for history in self.memory.history[-3:]: # Last 3 entries
|
|
292
|
+
if history.completed and history.summary:
|
|
293
|
+
summary_parts.append(
|
|
294
|
+
f"- Todo {history.todo_index}: {history.summary[:100]}"
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
self.memory.task_execution_summary = "\n".join(summary_parts)
|
|
298
|
+
|
|
299
|
+
def get_memory(self) -> PlannerMemory:
|
|
300
|
+
"""Get the current memory state.
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
Current PlannerMemory instance
|
|
304
|
+
"""
|
|
305
|
+
return self.memory
|
|
306
|
+
|
|
307
|
+
def append_todo(self, description: str) -> None:
|
|
308
|
+
"""Dynamically append a new todo to the workflow.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
description: Description of the new todo
|
|
312
|
+
"""
|
|
313
|
+
self.memory.append_todo(description)
|
|
314
|
+
logger.info(f"Appended new todo: {description}")
|
|
315
|
+
|
|
316
|
+
def append_deliverable(self, description: str) -> None:
|
|
317
|
+
"""Dynamically append a new deliverable to the workflow.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
description: Description of the new deliverable
|
|
321
|
+
"""
|
|
322
|
+
self.memory.append_deliverable(description)
|
|
323
|
+
logger.info(f"Appended new deliverable: {description}")
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
|
|
11
|
+
from .pyautogui_action_handler import PyautoguiActionHandler, PyautoguiConfig
|
|
12
|
+
from .types import Action
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AsyncPyautoguiActionHandler:
|
|
16
|
+
"""
|
|
17
|
+
Async wrapper for PyautoguiActionHandler that runs actions in a thread pool.
|
|
18
|
+
|
|
19
|
+
This allows PyAutoGUI operations to be non-blocking in async contexts,
|
|
20
|
+
enabling concurrent execution of other async tasks while GUI actions are performed.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, config: PyautoguiConfig | None = None):
|
|
24
|
+
"""Initialize with optional configuration.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
config: PyautoguiConfig instance for customizing behavior
|
|
28
|
+
"""
|
|
29
|
+
self.sync_handler = PyautoguiActionHandler(config=config)
|
|
30
|
+
self.config = config or PyautoguiConfig()
|
|
31
|
+
|
|
32
|
+
async def __call__(self, actions: list[Action]) -> None:
|
|
33
|
+
"""
|
|
34
|
+
Execute actions asynchronously using a thread pool executor.
|
|
35
|
+
|
|
36
|
+
This prevents PyAutoGUI operations from blocking the async event loop,
|
|
37
|
+
allowing other coroutines to run while GUI actions are being performed.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
actions: List of actions to execute
|
|
41
|
+
"""
|
|
42
|
+
loop = asyncio.get_event_loop()
|
|
43
|
+
# Run the synchronous handler in a thread pool to avoid blocking
|
|
44
|
+
await loop.run_in_executor(None, self.sync_handler, actions)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
|
|
11
|
+
from .screenshot_maker import ScreenshotMaker
|
|
12
|
+
from .types import Image, ImageConfig
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AsyncScreenshotMaker:
|
|
16
|
+
"""
|
|
17
|
+
Async wrapper for ScreenshotMaker that captures screenshots in a thread pool.
|
|
18
|
+
|
|
19
|
+
This allows screenshot capture to be non-blocking in async contexts,
|
|
20
|
+
enabling concurrent execution of other async tasks while screenshots are taken.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, config: ImageConfig | None = None):
|
|
24
|
+
"""Initialize with optional image configuration.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
config: ImageConfig instance for customizing screenshot format and quality
|
|
28
|
+
"""
|
|
29
|
+
self.sync_screenshot_maker = ScreenshotMaker(config=config)
|
|
30
|
+
self.config = config
|
|
31
|
+
|
|
32
|
+
async def __call__(self) -> Image:
|
|
33
|
+
"""
|
|
34
|
+
Capture a screenshot asynchronously using a thread pool executor.
|
|
35
|
+
|
|
36
|
+
This prevents screenshot capture from blocking the async event loop,
|
|
37
|
+
allowing other coroutines to run while the screenshot is being taken.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Image: The captured screenshot as a PILImage
|
|
41
|
+
"""
|
|
42
|
+
loop = asyncio.get_event_loop()
|
|
43
|
+
# Run the synchronous screenshot capture in a thread pool to avoid blocking
|
|
44
|
+
return await loop.run_in_executor(None, self.sync_screenshot_maker)
|
|
45
|
+
|
|
46
|
+
async def last_image(self) -> Image:
|
|
47
|
+
return self.sync_screenshot_maker.last_image()
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .task import AsyncTask
|
|
12
|
+
from .types import Image, Step
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def async_single_step(
|
|
16
|
+
task_description: str,
|
|
17
|
+
screenshot: str | bytes | Path | Image,
|
|
18
|
+
instruction: str | None = None,
|
|
19
|
+
api_key: str | None = None,
|
|
20
|
+
base_url: str | None = None,
|
|
21
|
+
temperature: float | None = None,
|
|
22
|
+
) -> Step:
|
|
23
|
+
"""
|
|
24
|
+
Perform a single-step inference asynchronously without maintaining task state.
|
|
25
|
+
|
|
26
|
+
This is useful for one-off analyses where you don't need to maintain
|
|
27
|
+
a conversation or task context across multiple steps.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
task_description: Description of the task to perform
|
|
31
|
+
screenshot: Screenshot as Image, bytes, or file path
|
|
32
|
+
instruction: Optional additional instruction for the task
|
|
33
|
+
api_key: OAGI API key (uses environment variable if not provided)
|
|
34
|
+
base_url: OAGI base URL (uses environment variable if not provided)
|
|
35
|
+
temperature: Sampling temperature (0.0-2.0) for LLM inference
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Step: Object containing reasoning, actions, and completion status
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
>>> # Using with bytes
|
|
42
|
+
>>> import asyncio
|
|
43
|
+
>>> async def main():
|
|
44
|
+
... with open("screenshot.png", "rb") as f:
|
|
45
|
+
... screenshot_bytes = f.read()
|
|
46
|
+
... step = await async_single_step(
|
|
47
|
+
... "Click the submit button",
|
|
48
|
+
... screenshot=screenshot_bytes
|
|
49
|
+
... )
|
|
50
|
+
... print(f"Actions: {step.actions}")
|
|
51
|
+
>>> asyncio.run(main())
|
|
52
|
+
|
|
53
|
+
>>> # Using with file path
|
|
54
|
+
>>> step = await async_single_step(
|
|
55
|
+
... "Find the search box",
|
|
56
|
+
... screenshot="screenshot.png"
|
|
57
|
+
... )
|
|
58
|
+
|
|
59
|
+
>>> # Using with PILImage
|
|
60
|
+
>>> image = PILImage.from_file("screenshot.png")
|
|
61
|
+
>>> step = await async_single_step(
|
|
62
|
+
... "Click next page",
|
|
63
|
+
... screenshot=image
|
|
64
|
+
... )
|
|
65
|
+
"""
|
|
66
|
+
# Lazy import PILImage only when needed
|
|
67
|
+
from .pil_image import PILImage # noqa: PLC0415
|
|
68
|
+
|
|
69
|
+
# Handle different screenshot input types
|
|
70
|
+
if isinstance(screenshot, (str, Path)):
|
|
71
|
+
screenshot = PILImage.from_file(str(screenshot))
|
|
72
|
+
elif isinstance(screenshot, bytes):
|
|
73
|
+
screenshot = PILImage.from_bytes(screenshot)
|
|
74
|
+
|
|
75
|
+
# Create a temporary task instance
|
|
76
|
+
task = AsyncTask(api_key=api_key, base_url=base_url, temperature=temperature)
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
# Initialize task and perform single step
|
|
80
|
+
await task.init_task(task_description)
|
|
81
|
+
result = await task.step(screenshot, instruction=instruction)
|
|
82
|
+
return result
|
|
83
|
+
finally:
|
|
84
|
+
# Clean up resources
|
|
85
|
+
await task.close()
|
oagi/cli/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
from oagi.cli.main import main
|
|
10
|
+
|
|
11
|
+
__all__ = ["main"]
|
oagi/cli/agent.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import asyncio
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
from oagi.exceptions import check_optional_dependency
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
|
|
18
|
+
agent_parser = subparsers.add_parser("agent", help="Agent execution commands")
|
|
19
|
+
agent_subparsers = agent_parser.add_subparsers(dest="agent_command", required=True)
|
|
20
|
+
|
|
21
|
+
# agent run command
|
|
22
|
+
run_parser = agent_subparsers.add_parser(
|
|
23
|
+
"run", help="Run an agent with the given instruction"
|
|
24
|
+
)
|
|
25
|
+
run_parser.add_argument(
|
|
26
|
+
"instruction", type=str, help="Task instruction for the agent to execute"
|
|
27
|
+
)
|
|
28
|
+
run_parser.add_argument("--model", type=str, help="Model to use (default: lux-v1)")
|
|
29
|
+
run_parser.add_argument(
|
|
30
|
+
"--max-steps", type=int, help="Maximum number of steps (default: 30)"
|
|
31
|
+
)
|
|
32
|
+
run_parser.add_argument(
|
|
33
|
+
"--temperature", type=float, help="Sampling temperature (default: 0.0)"
|
|
34
|
+
)
|
|
35
|
+
run_parser.add_argument(
|
|
36
|
+
"--mode",
|
|
37
|
+
type=str,
|
|
38
|
+
default="actor",
|
|
39
|
+
help="Agent mode to use (default: actor). Available modes: actor, planner",
|
|
40
|
+
)
|
|
41
|
+
run_parser.add_argument(
|
|
42
|
+
"--oagi-api-key", type=str, help="OAGI API key (default: OAGI_API_KEY env var)"
|
|
43
|
+
)
|
|
44
|
+
run_parser.add_argument(
|
|
45
|
+
"--oagi-base-url",
|
|
46
|
+
type=str,
|
|
47
|
+
help="OAGI base URL (default: https://api.agiopen.org, or OAGI_BASE_URL env var)",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def handle_agent_command(args: argparse.Namespace) -> None:
|
|
52
|
+
if args.agent_command == "run":
|
|
53
|
+
run_agent(args)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def run_agent(args: argparse.Namespace) -> None:
|
|
57
|
+
# Check if desktop extras are installed
|
|
58
|
+
check_optional_dependency("pyautogui", "Agent execution", "desktop")
|
|
59
|
+
check_optional_dependency("PIL", "Agent execution", "desktop")
|
|
60
|
+
|
|
61
|
+
from oagi import AsyncPyautoguiActionHandler, AsyncScreenshotMaker # noqa: PLC0415
|
|
62
|
+
from oagi.agent import create_agent # noqa: PLC0415
|
|
63
|
+
|
|
64
|
+
# Get configuration
|
|
65
|
+
api_key = args.oagi_api_key or os.getenv("OAGI_API_KEY")
|
|
66
|
+
if not api_key:
|
|
67
|
+
print(
|
|
68
|
+
"Error: OAGI API key not provided.\n"
|
|
69
|
+
"Set OAGI_API_KEY environment variable or use --oagi-api-key flag.",
|
|
70
|
+
file=sys.stderr,
|
|
71
|
+
)
|
|
72
|
+
sys.exit(1)
|
|
73
|
+
|
|
74
|
+
base_url = args.oagi_base_url or os.getenv(
|
|
75
|
+
"OAGI_BASE_URL", "https://api.agiopen.org"
|
|
76
|
+
)
|
|
77
|
+
model = args.model or "lux-v1"
|
|
78
|
+
max_steps = args.max_steps or 30
|
|
79
|
+
temperature = args.temperature if args.temperature is not None else 0.0
|
|
80
|
+
mode = args.mode or "actor"
|
|
81
|
+
|
|
82
|
+
# Create agent
|
|
83
|
+
agent = create_agent(
|
|
84
|
+
mode=mode,
|
|
85
|
+
api_key=api_key,
|
|
86
|
+
base_url=base_url,
|
|
87
|
+
model=model,
|
|
88
|
+
max_steps=max_steps,
|
|
89
|
+
temperature=temperature,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Create handlers
|
|
93
|
+
action_handler = AsyncPyautoguiActionHandler()
|
|
94
|
+
image_provider = AsyncScreenshotMaker()
|
|
95
|
+
|
|
96
|
+
print(f"Starting agent with instruction: {args.instruction}")
|
|
97
|
+
print(
|
|
98
|
+
f"Mode: {mode}, Model: {model}, Max steps: {max_steps}, Temperature: {temperature}"
|
|
99
|
+
)
|
|
100
|
+
print("-" * 60)
|
|
101
|
+
|
|
102
|
+
# Run agent
|
|
103
|
+
try:
|
|
104
|
+
success = asyncio.run(
|
|
105
|
+
agent.execute(
|
|
106
|
+
instruction=args.instruction,
|
|
107
|
+
action_handler=action_handler,
|
|
108
|
+
image_provider=image_provider,
|
|
109
|
+
)
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
print("-" * 60)
|
|
113
|
+
if success:
|
|
114
|
+
print("Task completed successfully!")
|
|
115
|
+
sys.exit(0)
|
|
116
|
+
else:
|
|
117
|
+
print("Task failed or reached max steps without completion.")
|
|
118
|
+
sys.exit(1)
|
|
119
|
+
|
|
120
|
+
except KeyboardInterrupt:
|
|
121
|
+
print("\nAgent execution interrupted.")
|
|
122
|
+
sys.exit(130)
|
|
123
|
+
except Exception as e:
|
|
124
|
+
print(f"Error during agent execution: {e}", file=sys.stderr)
|
|
125
|
+
sys.exit(1)
|