oagi-core 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. oagi/__init__.py +108 -0
  2. oagi/agent/__init__.py +31 -0
  3. oagi/agent/default.py +75 -0
  4. oagi/agent/factories.py +50 -0
  5. oagi/agent/protocol.py +55 -0
  6. oagi/agent/registry.py +155 -0
  7. oagi/agent/tasker/__init__.py +35 -0
  8. oagi/agent/tasker/memory.py +184 -0
  9. oagi/agent/tasker/models.py +83 -0
  10. oagi/agent/tasker/planner.py +385 -0
  11. oagi/agent/tasker/taskee_agent.py +395 -0
  12. oagi/agent/tasker/tasker_agent.py +323 -0
  13. oagi/async_pyautogui_action_handler.py +44 -0
  14. oagi/async_screenshot_maker.py +47 -0
  15. oagi/async_single_step.py +85 -0
  16. oagi/cli/__init__.py +11 -0
  17. oagi/cli/agent.py +125 -0
  18. oagi/cli/main.py +77 -0
  19. oagi/cli/server.py +94 -0
  20. oagi/cli/utils.py +82 -0
  21. oagi/client/__init__.py +12 -0
  22. oagi/client/async_.py +293 -0
  23. oagi/client/base.py +465 -0
  24. oagi/client/sync.py +296 -0
  25. oagi/exceptions.py +118 -0
  26. oagi/logging.py +47 -0
  27. oagi/pil_image.py +102 -0
  28. oagi/pyautogui_action_handler.py +268 -0
  29. oagi/screenshot_maker.py +41 -0
  30. oagi/server/__init__.py +13 -0
  31. oagi/server/agent_wrappers.py +98 -0
  32. oagi/server/config.py +46 -0
  33. oagi/server/main.py +157 -0
  34. oagi/server/models.py +98 -0
  35. oagi/server/session_store.py +116 -0
  36. oagi/server/socketio_server.py +405 -0
  37. oagi/single_step.py +87 -0
  38. oagi/task/__init__.py +14 -0
  39. oagi/task/async_.py +97 -0
  40. oagi/task/async_short.py +64 -0
  41. oagi/task/base.py +121 -0
  42. oagi/task/short.py +64 -0
  43. oagi/task/sync.py +97 -0
  44. oagi/types/__init__.py +28 -0
  45. oagi/types/action_handler.py +30 -0
  46. oagi/types/async_action_handler.py +30 -0
  47. oagi/types/async_image_provider.py +37 -0
  48. oagi/types/image.py +17 -0
  49. oagi/types/image_provider.py +34 -0
  50. oagi/types/models/__init__.py +32 -0
  51. oagi/types/models/action.py +33 -0
  52. oagi/types/models/client.py +64 -0
  53. oagi/types/models/image_config.py +47 -0
  54. oagi/types/models/step.py +17 -0
  55. oagi/types/url_image.py +47 -0
  56. oagi_core-0.9.0.dist-info/METADATA +257 -0
  57. oagi_core-0.9.0.dist-info/RECORD +60 -0
  58. oagi_core-0.9.0.dist-info/WHEEL +4 -0
  59. oagi_core-0.9.0.dist-info/entry_points.txt +2 -0
  60. oagi_core-0.9.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,323 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import logging
10
+ from typing import Any
11
+
12
+ from oagi.types import AsyncActionHandler, AsyncImageProvider
13
+
14
+ from ..protocol import AsyncAgent
15
+ from .memory import PlannerMemory
16
+ from .models import TodoStatus
17
+ from .planner import Planner
18
+ from .taskee_agent import TaskeeAgent
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class TaskerAgent(AsyncAgent):
24
+ """Hierarchical agent that manages multi-todo workflows.
25
+
26
+ This agent orchestrates the execution of multiple todos by:
27
+ 1. Managing a workflow with todos and deliverables
28
+ 2. Executing todos sequentially using TaskeeAgent
29
+ 3. Tracking progress and updating memory
30
+ 4. Sharing context between todos for informed execution
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ api_key: str | None = None,
36
+ base_url: str | None = None,
37
+ model: str = "lux-v1",
38
+ max_steps: int = 30,
39
+ temperature: float = 0.0,
40
+ reflection_interval: int = 20,
41
+ planner: Planner | None = None,
42
+ ):
43
+ """Initialize the tasker agent.
44
+
45
+ Args:
46
+ api_key: OAGI API key
47
+ base_url: OAGI API base URL
48
+ model: Model to use for vision tasks
49
+ max_steps: Maximum steps per todo
50
+ temperature: Sampling temperature
51
+ reflection_interval: Actions before reflection
52
+ planner: Planner for planning and reflection
53
+ """
54
+ self.api_key = api_key
55
+ self.base_url = base_url
56
+ self.model = model
57
+ self.max_steps = max_steps
58
+ self.temperature = temperature
59
+ self.reflection_interval = reflection_interval
60
+ self.planner = planner or Planner()
61
+
62
+ # Memory for tracking workflow
63
+ self.memory = PlannerMemory()
64
+
65
+ # Current execution state
66
+ self.current_taskee_agent: TaskeeAgent | None = None
67
+ self.current_todo_index: int = -1
68
+
69
+ def set_task(
70
+ self,
71
+ task: str,
72
+ todos: list[str],
73
+ deliverables: list[str] | None = None,
74
+ ) -> None:
75
+ """Set the task, todos, and deliverables for the workflow.
76
+
77
+ Args:
78
+ task: Overall task description
79
+ todos: List of todo descriptions
80
+ deliverables: Optional list of deliverable descriptions
81
+ """
82
+ self.memory.set_task(task, todos, deliverables)
83
+ logger.info(
84
+ f"Task set with {len(todos)} todos and "
85
+ f"{len(deliverables) if deliverables else 0} deliverables"
86
+ )
87
+
88
+ async def execute(
89
+ self,
90
+ instruction: str,
91
+ action_handler: AsyncActionHandler,
92
+ image_provider: AsyncImageProvider,
93
+ ) -> bool:
94
+ """Execute the multi-todo workflow.
95
+
96
+ This method will execute todos sequentially until all are complete
97
+ or a failure occurs.
98
+
99
+ Args:
100
+ instruction: Overall instruction (can be same as task description)
101
+ action_handler: Handler for executing actions
102
+ image_provider: Provider for capturing screenshots
103
+
104
+ Returns:
105
+ True if all todos completed successfully, False otherwise
106
+ """
107
+ # If task not set, use instruction as task description
108
+ if not self.memory.task_description:
109
+ logger.warning("Task not set, using instruction as task description")
110
+ self.memory.task_description = instruction
111
+
112
+ overall_success = True
113
+
114
+ # Execute todos until none remain
115
+ while True:
116
+ # Prepare for next todo
117
+ todo_info = self._prepare()
118
+
119
+ if todo_info is None:
120
+ # No more todos to execute
121
+ logger.info("No more todos to execute")
122
+ break
123
+
124
+ todo, todo_index = todo_info
125
+ logger.info(f"Executing todo {todo_index}: {todo.description}")
126
+
127
+ # Execute the todo
128
+ success = await self._execute_todo(
129
+ todo_index,
130
+ action_handler,
131
+ image_provider,
132
+ )
133
+
134
+ if not success:
135
+ logger.warning(f"Todo {todo_index} failed")
136
+ overall_success = False
137
+ # If todo failed due to exception, it stays IN_PROGRESS
138
+ # Break to avoid infinite loop re-attempting same todo
139
+ current_status = self.memory.todos[todo_index].status
140
+ if current_status == TodoStatus.IN_PROGRESS:
141
+ logger.error("Todo failed with exception, stopping execution")
142
+ break
143
+ # Otherwise continue with next todo
144
+
145
+ # Update task execution summary
146
+ self._update_task_summary()
147
+
148
+ # Log final status
149
+ status_summary = self.memory.get_todo_status_summary()
150
+ logger.info(f"Workflow complete. Status summary: {status_summary}")
151
+
152
+ return overall_success
153
+
154
+ def _prepare(self) -> tuple[Any, int] | None:
155
+ """Prepare for the next todo execution.
156
+
157
+ Returns:
158
+ Tuple of (todo, index) or None if no todos remain
159
+ """
160
+ # Get current todo
161
+ todo, todo_index = self.memory.get_current_todo()
162
+
163
+ if todo is None:
164
+ return None
165
+
166
+ # Create taskee agent with external memory
167
+ self.current_taskee_agent = TaskeeAgent(
168
+ api_key=self.api_key,
169
+ base_url=self.base_url,
170
+ model=self.model,
171
+ max_steps_per_subtask=10, # Smaller steps per subtask
172
+ reflection_interval=self.reflection_interval,
173
+ temperature=self.temperature,
174
+ planner=self.planner,
175
+ external_memory=self.memory, # Share memory with child
176
+ todo_index=todo_index, # Pass the todo index
177
+ )
178
+
179
+ self.current_todo_index = todo_index
180
+
181
+ # Update todo status to in_progress if it was pending
182
+ if todo.status == TodoStatus.PENDING:
183
+ self.memory.update_todo(todo_index, TodoStatus.IN_PROGRESS)
184
+
185
+ logger.info(f"Prepared taskee agent for todo {todo_index}")
186
+
187
+ return todo, todo_index
188
+
189
+ async def _execute_todo(
190
+ self,
191
+ todo_index: int,
192
+ action_handler: AsyncActionHandler,
193
+ image_provider: AsyncImageProvider,
194
+ ) -> bool:
195
+ """Execute a single todo using the todo agent.
196
+
197
+ Args:
198
+ todo_index: Index of the todo to execute
199
+ action_handler: Handler for executing actions
200
+ image_provider: Provider for capturing screenshots
201
+
202
+ Returns:
203
+ True if successful, False otherwise
204
+ """
205
+ if not self.current_taskee_agent or todo_index < 0:
206
+ logger.error("No taskee agent prepared")
207
+ return False
208
+
209
+ todo = self.memory.todos[todo_index]
210
+
211
+ try:
212
+ # Execute using taskee agent
213
+ success = await self.current_taskee_agent.execute(
214
+ todo.description,
215
+ action_handler,
216
+ image_provider,
217
+ )
218
+
219
+ # Get execution results
220
+ results = self.current_taskee_agent.return_execution_results()
221
+
222
+ # Update memory with results
223
+ self._update_memory_from_execution(todo_index, results, success)
224
+
225
+ return success
226
+
227
+ except Exception as e:
228
+ logger.error(f"Error executing todo {todo_index}: {e}")
229
+ # Mark as in_progress (not completed)
230
+ self.memory.update_todo(
231
+ todo_index,
232
+ TodoStatus.IN_PROGRESS,
233
+ summary=f"Execution failed: {str(e)}",
234
+ )
235
+ return False
236
+
237
+ def _update_memory_from_execution(
238
+ self,
239
+ todo_index: int,
240
+ results: Any,
241
+ success: bool,
242
+ ) -> None:
243
+ """Update memory based on execution results.
244
+
245
+ Args:
246
+ todo_index: Index of the executed todo
247
+ results: Execution results from todo agent
248
+ success: Whether execution was successful
249
+ """
250
+ # Update todo status
251
+ status = TodoStatus.COMPLETED if success else TodoStatus.IN_PROGRESS
252
+ self.memory.update_todo(
253
+ todo_index,
254
+ status,
255
+ summary=results.summary,
256
+ )
257
+
258
+ # Add to history
259
+ self.memory.add_history(
260
+ todo_index,
261
+ results.actions,
262
+ summary=results.summary,
263
+ completed=success,
264
+ )
265
+
266
+ # Update task execution summary
267
+ if success:
268
+ if self.memory.task_execution_summary:
269
+ self.memory.task_execution_summary += (
270
+ f"\n- Completed todo {todo_index}: {results.summary}"
271
+ )
272
+ else:
273
+ self.memory.task_execution_summary = (
274
+ f"- Completed todo {todo_index}: {results.summary}"
275
+ )
276
+
277
+ logger.info(
278
+ f"Updated memory for todo {todo_index}: "
279
+ f"status={status}, actions={len(results.actions)}"
280
+ )
281
+
282
+ def _update_task_summary(self) -> None:
283
+ """Update the overall task execution summary."""
284
+ status_summary = self.memory.get_todo_status_summary()
285
+ completed = status_summary.get(TodoStatus.COMPLETED, 0)
286
+ total = len(self.memory.todos)
287
+
288
+ summary_parts = [f"Progress: {completed}/{total} todos completed"]
289
+
290
+ # Add recent completions
291
+ for history in self.memory.history[-3:]: # Last 3 entries
292
+ if history.completed and history.summary:
293
+ summary_parts.append(
294
+ f"- Todo {history.todo_index}: {history.summary[:100]}"
295
+ )
296
+
297
+ self.memory.task_execution_summary = "\n".join(summary_parts)
298
+
299
+ def get_memory(self) -> PlannerMemory:
300
+ """Get the current memory state.
301
+
302
+ Returns:
303
+ Current PlannerMemory instance
304
+ """
305
+ return self.memory
306
+
307
+ def append_todo(self, description: str) -> None:
308
+ """Dynamically append a new todo to the workflow.
309
+
310
+ Args:
311
+ description: Description of the new todo
312
+ """
313
+ self.memory.append_todo(description)
314
+ logger.info(f"Appended new todo: {description}")
315
+
316
+ def append_deliverable(self, description: str) -> None:
317
+ """Dynamically append a new deliverable to the workflow.
318
+
319
+ Args:
320
+ description: Description of the new deliverable
321
+ """
322
+ self.memory.append_deliverable(description)
323
+ logger.info(f"Appended new deliverable: {description}")
@@ -0,0 +1,44 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import asyncio
10
+
11
+ from .pyautogui_action_handler import PyautoguiActionHandler, PyautoguiConfig
12
+ from .types import Action
13
+
14
+
15
+ class AsyncPyautoguiActionHandler:
16
+ """
17
+ Async wrapper for PyautoguiActionHandler that runs actions in a thread pool.
18
+
19
+ This allows PyAutoGUI operations to be non-blocking in async contexts,
20
+ enabling concurrent execution of other async tasks while GUI actions are performed.
21
+ """
22
+
23
+ def __init__(self, config: PyautoguiConfig | None = None):
24
+ """Initialize with optional configuration.
25
+
26
+ Args:
27
+ config: PyautoguiConfig instance for customizing behavior
28
+ """
29
+ self.sync_handler = PyautoguiActionHandler(config=config)
30
+ self.config = config or PyautoguiConfig()
31
+
32
+ async def __call__(self, actions: list[Action]) -> None:
33
+ """
34
+ Execute actions asynchronously using a thread pool executor.
35
+
36
+ This prevents PyAutoGUI operations from blocking the async event loop,
37
+ allowing other coroutines to run while GUI actions are being performed.
38
+
39
+ Args:
40
+ actions: List of actions to execute
41
+ """
42
+ loop = asyncio.get_event_loop()
43
+ # Run the synchronous handler in a thread pool to avoid blocking
44
+ await loop.run_in_executor(None, self.sync_handler, actions)
@@ -0,0 +1,47 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import asyncio
10
+
11
+ from .screenshot_maker import ScreenshotMaker
12
+ from .types import Image, ImageConfig
13
+
14
+
15
+ class AsyncScreenshotMaker:
16
+ """
17
+ Async wrapper for ScreenshotMaker that captures screenshots in a thread pool.
18
+
19
+ This allows screenshot capture to be non-blocking in async contexts,
20
+ enabling concurrent execution of other async tasks while screenshots are taken.
21
+ """
22
+
23
+ def __init__(self, config: ImageConfig | None = None):
24
+ """Initialize with optional image configuration.
25
+
26
+ Args:
27
+ config: ImageConfig instance for customizing screenshot format and quality
28
+ """
29
+ self.sync_screenshot_maker = ScreenshotMaker(config=config)
30
+ self.config = config
31
+
32
+ async def __call__(self) -> Image:
33
+ """
34
+ Capture a screenshot asynchronously using a thread pool executor.
35
+
36
+ This prevents screenshot capture from blocking the async event loop,
37
+ allowing other coroutines to run while the screenshot is being taken.
38
+
39
+ Returns:
40
+ Image: The captured screenshot as a PILImage
41
+ """
42
+ loop = asyncio.get_event_loop()
43
+ # Run the synchronous screenshot capture in a thread pool to avoid blocking
44
+ return await loop.run_in_executor(None, self.sync_screenshot_maker)
45
+
46
+ async def last_image(self) -> Image:
47
+ return self.sync_screenshot_maker.last_image()
@@ -0,0 +1,85 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ from pathlib import Path
10
+
11
+ from .task import AsyncTask
12
+ from .types import Image, Step
13
+
14
+
15
+ async def async_single_step(
16
+ task_description: str,
17
+ screenshot: str | bytes | Path | Image,
18
+ instruction: str | None = None,
19
+ api_key: str | None = None,
20
+ base_url: str | None = None,
21
+ temperature: float | None = None,
22
+ ) -> Step:
23
+ """
24
+ Perform a single-step inference asynchronously without maintaining task state.
25
+
26
+ This is useful for one-off analyses where you don't need to maintain
27
+ a conversation or task context across multiple steps.
28
+
29
+ Args:
30
+ task_description: Description of the task to perform
31
+ screenshot: Screenshot as Image, bytes, or file path
32
+ instruction: Optional additional instruction for the task
33
+ api_key: OAGI API key (uses environment variable if not provided)
34
+ base_url: OAGI base URL (uses environment variable if not provided)
35
+ temperature: Sampling temperature (0.0-2.0) for LLM inference
36
+
37
+ Returns:
38
+ Step: Object containing reasoning, actions, and completion status
39
+
40
+ Example:
41
+ >>> # Using with bytes
42
+ >>> import asyncio
43
+ >>> async def main():
44
+ ... with open("screenshot.png", "rb") as f:
45
+ ... screenshot_bytes = f.read()
46
+ ... step = await async_single_step(
47
+ ... "Click the submit button",
48
+ ... screenshot=screenshot_bytes
49
+ ... )
50
+ ... print(f"Actions: {step.actions}")
51
+ >>> asyncio.run(main())
52
+
53
+ >>> # Using with file path
54
+ >>> step = await async_single_step(
55
+ ... "Find the search box",
56
+ ... screenshot="screenshot.png"
57
+ ... )
58
+
59
+ >>> # Using with PILImage
60
+ >>> image = PILImage.from_file("screenshot.png")
61
+ >>> step = await async_single_step(
62
+ ... "Click next page",
63
+ ... screenshot=image
64
+ ... )
65
+ """
66
+ # Lazy import PILImage only when needed
67
+ from .pil_image import PILImage # noqa: PLC0415
68
+
69
+ # Handle different screenshot input types
70
+ if isinstance(screenshot, (str, Path)):
71
+ screenshot = PILImage.from_file(str(screenshot))
72
+ elif isinstance(screenshot, bytes):
73
+ screenshot = PILImage.from_bytes(screenshot)
74
+
75
+ # Create a temporary task instance
76
+ task = AsyncTask(api_key=api_key, base_url=base_url, temperature=temperature)
77
+
78
+ try:
79
+ # Initialize task and perform single step
80
+ await task.init_task(task_description)
81
+ result = await task.step(screenshot, instruction=instruction)
82
+ return result
83
+ finally:
84
+ # Clean up resources
85
+ await task.close()
oagi/cli/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ from oagi.cli.main import main
10
+
11
+ __all__ = ["main"]
oagi/cli/agent.py ADDED
@@ -0,0 +1,125 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import argparse
10
+ import asyncio
11
+ import os
12
+ import sys
13
+
14
+ from oagi.exceptions import check_optional_dependency
15
+
16
+
17
+ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
18
+ agent_parser = subparsers.add_parser("agent", help="Agent execution commands")
19
+ agent_subparsers = agent_parser.add_subparsers(dest="agent_command", required=True)
20
+
21
+ # agent run command
22
+ run_parser = agent_subparsers.add_parser(
23
+ "run", help="Run an agent with the given instruction"
24
+ )
25
+ run_parser.add_argument(
26
+ "instruction", type=str, help="Task instruction for the agent to execute"
27
+ )
28
+ run_parser.add_argument("--model", type=str, help="Model to use (default: lux-v1)")
29
+ run_parser.add_argument(
30
+ "--max-steps", type=int, help="Maximum number of steps (default: 30)"
31
+ )
32
+ run_parser.add_argument(
33
+ "--temperature", type=float, help="Sampling temperature (default: 0.0)"
34
+ )
35
+ run_parser.add_argument(
36
+ "--mode",
37
+ type=str,
38
+ default="actor",
39
+ help="Agent mode to use (default: actor). Available modes: actor, planner",
40
+ )
41
+ run_parser.add_argument(
42
+ "--oagi-api-key", type=str, help="OAGI API key (default: OAGI_API_KEY env var)"
43
+ )
44
+ run_parser.add_argument(
45
+ "--oagi-base-url",
46
+ type=str,
47
+ help="OAGI base URL (default: https://api.agiopen.org, or OAGI_BASE_URL env var)",
48
+ )
49
+
50
+
51
+ def handle_agent_command(args: argparse.Namespace) -> None:
52
+ if args.agent_command == "run":
53
+ run_agent(args)
54
+
55
+
56
+ def run_agent(args: argparse.Namespace) -> None:
57
+ # Check if desktop extras are installed
58
+ check_optional_dependency("pyautogui", "Agent execution", "desktop")
59
+ check_optional_dependency("PIL", "Agent execution", "desktop")
60
+
61
+ from oagi import AsyncPyautoguiActionHandler, AsyncScreenshotMaker # noqa: PLC0415
62
+ from oagi.agent import create_agent # noqa: PLC0415
63
+
64
+ # Get configuration
65
+ api_key = args.oagi_api_key or os.getenv("OAGI_API_KEY")
66
+ if not api_key:
67
+ print(
68
+ "Error: OAGI API key not provided.\n"
69
+ "Set OAGI_API_KEY environment variable or use --oagi-api-key flag.",
70
+ file=sys.stderr,
71
+ )
72
+ sys.exit(1)
73
+
74
+ base_url = args.oagi_base_url or os.getenv(
75
+ "OAGI_BASE_URL", "https://api.agiopen.org"
76
+ )
77
+ model = args.model or "lux-v1"
78
+ max_steps = args.max_steps or 30
79
+ temperature = args.temperature if args.temperature is not None else 0.0
80
+ mode = args.mode or "actor"
81
+
82
+ # Create agent
83
+ agent = create_agent(
84
+ mode=mode,
85
+ api_key=api_key,
86
+ base_url=base_url,
87
+ model=model,
88
+ max_steps=max_steps,
89
+ temperature=temperature,
90
+ )
91
+
92
+ # Create handlers
93
+ action_handler = AsyncPyautoguiActionHandler()
94
+ image_provider = AsyncScreenshotMaker()
95
+
96
+ print(f"Starting agent with instruction: {args.instruction}")
97
+ print(
98
+ f"Mode: {mode}, Model: {model}, Max steps: {max_steps}, Temperature: {temperature}"
99
+ )
100
+ print("-" * 60)
101
+
102
+ # Run agent
103
+ try:
104
+ success = asyncio.run(
105
+ agent.execute(
106
+ instruction=args.instruction,
107
+ action_handler=action_handler,
108
+ image_provider=image_provider,
109
+ )
110
+ )
111
+
112
+ print("-" * 60)
113
+ if success:
114
+ print("Task completed successfully!")
115
+ sys.exit(0)
116
+ else:
117
+ print("Task failed or reached max steps without completion.")
118
+ sys.exit(1)
119
+
120
+ except KeyboardInterrupt:
121
+ print("\nAgent execution interrupted.")
122
+ sys.exit(130)
123
+ except Exception as e:
124
+ print(f"Error during agent execution: {e}", file=sys.stderr)
125
+ sys.exit(1)