oagi-core 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. oagi/__init__.py +148 -0
  2. oagi/agent/__init__.py +33 -0
  3. oagi/agent/default.py +124 -0
  4. oagi/agent/factories.py +74 -0
  5. oagi/agent/observer/__init__.py +38 -0
  6. oagi/agent/observer/agent_observer.py +99 -0
  7. oagi/agent/observer/events.py +28 -0
  8. oagi/agent/observer/exporters.py +445 -0
  9. oagi/agent/observer/protocol.py +12 -0
  10. oagi/agent/protocol.py +55 -0
  11. oagi/agent/registry.py +155 -0
  12. oagi/agent/tasker/__init__.py +33 -0
  13. oagi/agent/tasker/memory.py +160 -0
  14. oagi/agent/tasker/models.py +77 -0
  15. oagi/agent/tasker/planner.py +408 -0
  16. oagi/agent/tasker/taskee_agent.py +512 -0
  17. oagi/agent/tasker/tasker_agent.py +324 -0
  18. oagi/cli/__init__.py +11 -0
  19. oagi/cli/agent.py +281 -0
  20. oagi/cli/display.py +56 -0
  21. oagi/cli/main.py +77 -0
  22. oagi/cli/server.py +94 -0
  23. oagi/cli/tracking.py +55 -0
  24. oagi/cli/utils.py +89 -0
  25. oagi/client/__init__.py +12 -0
  26. oagi/client/async_.py +290 -0
  27. oagi/client/base.py +457 -0
  28. oagi/client/sync.py +293 -0
  29. oagi/exceptions.py +118 -0
  30. oagi/handler/__init__.py +24 -0
  31. oagi/handler/_macos.py +55 -0
  32. oagi/handler/async_pyautogui_action_handler.py +44 -0
  33. oagi/handler/async_screenshot_maker.py +47 -0
  34. oagi/handler/pil_image.py +102 -0
  35. oagi/handler/pyautogui_action_handler.py +291 -0
  36. oagi/handler/screenshot_maker.py +41 -0
  37. oagi/logging.py +55 -0
  38. oagi/server/__init__.py +13 -0
  39. oagi/server/agent_wrappers.py +98 -0
  40. oagi/server/config.py +46 -0
  41. oagi/server/main.py +157 -0
  42. oagi/server/models.py +98 -0
  43. oagi/server/session_store.py +116 -0
  44. oagi/server/socketio_server.py +405 -0
  45. oagi/task/__init__.py +21 -0
  46. oagi/task/async_.py +101 -0
  47. oagi/task/async_short.py +76 -0
  48. oagi/task/base.py +157 -0
  49. oagi/task/short.py +76 -0
  50. oagi/task/sync.py +99 -0
  51. oagi/types/__init__.py +50 -0
  52. oagi/types/action_handler.py +30 -0
  53. oagi/types/async_action_handler.py +30 -0
  54. oagi/types/async_image_provider.py +38 -0
  55. oagi/types/image.py +17 -0
  56. oagi/types/image_provider.py +35 -0
  57. oagi/types/models/__init__.py +32 -0
  58. oagi/types/models/action.py +33 -0
  59. oagi/types/models/client.py +68 -0
  60. oagi/types/models/image_config.py +47 -0
  61. oagi/types/models/step.py +17 -0
  62. oagi/types/step_observer.py +93 -0
  63. oagi/types/url.py +3 -0
  64. oagi_core-0.10.1.dist-info/METADATA +245 -0
  65. oagi_core-0.10.1.dist-info/RECORD +68 -0
  66. oagi_core-0.10.1.dist-info/WHEEL +4 -0
  67. oagi_core-0.10.1.dist-info/entry_points.txt +2 -0
  68. oagi_core-0.10.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,512 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import logging
10
+ from datetime import datetime
11
+ from typing import Any
12
+
13
+ from oagi import AsyncActor
14
+ from oagi.types import (
15
+ URL,
16
+ ActionEvent,
17
+ AsyncActionHandler,
18
+ AsyncImageProvider,
19
+ AsyncObserver,
20
+ Image,
21
+ PlanEvent,
22
+ StepEvent,
23
+ )
24
+
25
+ from ..protocol import AsyncAgent
26
+ from .memory import PlannerMemory
27
+ from .models import Action, ExecutionResult
28
+ from .planner import Planner
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ def _serialize_image(image: Image | str) -> bytes | str:
34
+ """Convert an image to bytes or keep URL as string."""
35
+ if isinstance(image, str):
36
+ return image
37
+ return image.read()
38
+
39
+
40
+ class TaskeeAgent(AsyncAgent):
41
+ """Executes a single todo with planning and reflection capabilities.
42
+
43
+ This agent uses a Planner to:
44
+ 1. Convert a todo into a clear actionable instruction
45
+ 2. Execute the instruction using OAGI API
46
+ 3. Periodically reflect on progress and adjust approach
47
+ 4. Generate execution summaries
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ api_key: str | None = None,
53
+ base_url: str | None = None,
54
+ model: str = "lux-actor-1",
55
+ max_steps: int = 20,
56
+ reflection_interval: int = 4,
57
+ temperature: float = 0.5,
58
+ planner: Planner | None = None,
59
+ external_memory: PlannerMemory | None = None,
60
+ todo_index: int | None = None,
61
+ step_observer: AsyncObserver | None = None,
62
+ ):
63
+ """Initialize the taskee agent.
64
+
65
+ Args:
66
+ api_key: OAGI API key
67
+ base_url: OAGI API base URL
68
+ model: Model to use for vision tasks
69
+ max_steps: Maximum steps before reinitializing task
70
+ reflection_interval: Number of actions before triggering reflection
71
+ temperature: Sampling temperature
72
+ planner: Planner for planning and reflection
73
+ external_memory: External memory from parent agent
74
+ todo_index: Index of the todo being executed
75
+ step_observer: Optional observer for step tracking
76
+ """
77
+ self.api_key = api_key
78
+ self.base_url = base_url
79
+ self.model = model
80
+ self.max_steps = max_steps
81
+ self.reflection_interval = reflection_interval
82
+ self.temperature = temperature
83
+ self.planner = planner or Planner(api_key=api_key, base_url=base_url)
84
+ self.external_memory = external_memory
85
+ self.todo_index = todo_index
86
+ self.step_observer = step_observer
87
+
88
+ # Internal state
89
+ self.actor: AsyncActor | None = None
90
+ self.current_todo: str = ""
91
+ self.current_instruction: str = ""
92
+ self.actions: list[Action] = []
93
+ self.total_actions = 0
94
+ self.since_reflection = 0
95
+ self.success = False
96
+
97
+ async def execute(
98
+ self,
99
+ instruction: str,
100
+ action_handler: AsyncActionHandler,
101
+ image_provider: AsyncImageProvider,
102
+ ) -> bool:
103
+ """Execute the todo using planning and reflection.
104
+
105
+ Args:
106
+ instruction: The todo description to execute
107
+ action_handler: Handler for executing actions
108
+ image_provider: Provider for capturing screenshots
109
+
110
+ Returns:
111
+ True if successful, False otherwise
112
+ """
113
+ self.current_todo = instruction
114
+ self.actions = []
115
+ self.total_actions = 0
116
+ self.since_reflection = 0
117
+ self.success = False
118
+
119
+ try:
120
+ self.actor = AsyncActor(
121
+ api_key=self.api_key,
122
+ base_url=self.base_url,
123
+ model=self.model,
124
+ temperature=self.temperature,
125
+ )
126
+ # Initial planning
127
+ await self._initial_plan(image_provider)
128
+
129
+ # Initialize the actor with the task
130
+ await self.actor.init_task(
131
+ self.current_instruction, max_steps=self.max_steps
132
+ )
133
+
134
+ # Main execution loop with reinitializations
135
+ remaining_steps = self.max_steps
136
+
137
+ while remaining_steps > 0 and not self.success:
138
+ # Execute subtask
139
+ steps_taken = await self._execute_subtask(
140
+ min(self.max_steps, remaining_steps),
141
+ action_handler,
142
+ image_provider,
143
+ )
144
+ remaining_steps -= steps_taken
145
+
146
+ # Check if we should continue
147
+ if not self.success and remaining_steps > 0:
148
+ # Reflect and potentially get new instruction
149
+ should_continue = await self._reflect_and_decide(image_provider)
150
+ if not should_continue:
151
+ break
152
+
153
+ # Generate final summary
154
+ await self._generate_summary()
155
+
156
+ return self.success
157
+
158
+ except Exception as e:
159
+ logger.error(f"Error executing todo: {e}")
160
+ self._record_action(
161
+ action_type="error",
162
+ target=None,
163
+ reasoning=str(e),
164
+ )
165
+ return False
166
+ finally:
167
+ # Clean up actor
168
+ if self.actor:
169
+ await self.actor.close()
170
+ self.actor = None
171
+
172
+ async def _initial_plan(self, image_provider: AsyncImageProvider) -> None:
173
+ """Generate initial plan for the todo.
174
+
175
+ Args:
176
+ image_provider: Provider for capturing screenshots
177
+ """
178
+ logger.info("Generating initial plan for todo")
179
+
180
+ # Capture initial screenshot
181
+ screenshot = await image_provider()
182
+
183
+ # Get context from external memory if available
184
+ context = self._get_context()
185
+
186
+ # Generate plan using LLM planner
187
+ plan_output = await self.planner.initial_plan(
188
+ self.current_todo,
189
+ context,
190
+ screenshot,
191
+ memory=self.external_memory,
192
+ todo_index=self.todo_index,
193
+ )
194
+
195
+ # Record planning action
196
+ self._record_action(
197
+ action_type="plan",
198
+ target="initial",
199
+ reasoning=plan_output.reasoning,
200
+ result=plan_output.instruction,
201
+ )
202
+
203
+ # Emit plan event
204
+ if self.step_observer:
205
+ await self.step_observer.on_event(
206
+ PlanEvent(
207
+ phase="initial",
208
+ image=_serialize_image(screenshot),
209
+ reasoning=plan_output.reasoning,
210
+ result=plan_output.instruction,
211
+ )
212
+ )
213
+
214
+ # Set current instruction
215
+ self.current_instruction = plan_output.instruction
216
+ logger.info(f"Initial instruction: {self.current_instruction}")
217
+
218
+ # Handle subtodos if any
219
+ if plan_output.subtodos:
220
+ logger.info(f"Planner created {len(plan_output.subtodos)} subtodos")
221
+ # Could potentially add these to memory for tracking
222
+
223
+ async def _execute_subtask(
224
+ self,
225
+ max_steps: int,
226
+ action_handler: AsyncActionHandler,
227
+ image_provider: AsyncImageProvider,
228
+ ) -> int:
229
+ """Execute a subtask with the current instruction.
230
+
231
+ Args:
232
+ max_steps: Maximum steps for this subtask
233
+ action_handler: Handler for executing actions
234
+ image_provider: Provider for capturing screenshots
235
+
236
+ Returns:
237
+ Number of steps taken
238
+ """
239
+ logger.info(f"Executing subtask with max {max_steps} steps")
240
+
241
+ steps_taken = 0
242
+ client = self.planner._ensure_client()
243
+
244
+ for step_num in range(max_steps):
245
+ # Capture screenshot
246
+ screenshot = await image_provider()
247
+
248
+ # Upload screenshot first to get UUID (avoids re-upload in actor.step)
249
+ try:
250
+ upload_response = await client.put_s3_presigned_url(screenshot)
251
+ screenshot_uuid = upload_response.uuid
252
+ screenshot_url = upload_response.download_url
253
+ except Exception as e:
254
+ logger.error(f"Error uploading screenshot: {e}")
255
+ self._record_action(
256
+ action_type="error",
257
+ target="screenshot_upload",
258
+ reasoning=str(e),
259
+ )
260
+ break
261
+
262
+ # Get next step from OAGI using URL (avoids re-upload)
263
+ try:
264
+ step = await self.actor.step(URL(screenshot_url), instruction=None)
265
+ except Exception as e:
266
+ logger.error(f"Error getting step from OAGI: {e}")
267
+ self._record_action(
268
+ action_type="error",
269
+ target="oagi_step",
270
+ reasoning=str(e),
271
+ screenshot_uuid=screenshot_uuid,
272
+ )
273
+ break
274
+
275
+ # Log reasoning
276
+ if step.reason:
277
+ logger.info(f"Step {self.total_actions + 1}: {step.reason}")
278
+
279
+ # Emit step event
280
+ if self.step_observer:
281
+ await self.step_observer.on_event(
282
+ StepEvent(
283
+ step_num=self.total_actions + 1,
284
+ image=_serialize_image(screenshot),
285
+ step=step,
286
+ )
287
+ )
288
+
289
+ # Record OAGI actions
290
+ if step.actions:
291
+ # Log actions with details
292
+ logger.info(f"Actions ({len(step.actions)}):")
293
+ for action in step.actions:
294
+ count_suffix = (
295
+ f" x{action.count}" if action.count and action.count > 1 else ""
296
+ )
297
+ logger.info(
298
+ f" [{action.type.value}] {action.argument}{count_suffix}"
299
+ )
300
+
301
+ for action in step.actions:
302
+ self._record_action(
303
+ action_type=action.type.lower(),
304
+ target=action.argument,
305
+ reasoning=step.reason,
306
+ screenshot_uuid=screenshot_uuid,
307
+ )
308
+
309
+ # Execute actions
310
+ error = None
311
+ try:
312
+ await action_handler(step.actions)
313
+ except Exception as e:
314
+ error = str(e)
315
+ raise
316
+
317
+ # Emit action event
318
+ if self.step_observer:
319
+ await self.step_observer.on_event(
320
+ ActionEvent(
321
+ step_num=self.total_actions + 1,
322
+ actions=step.actions,
323
+ error=error,
324
+ )
325
+ )
326
+
327
+ self.total_actions += len(step.actions)
328
+ self.since_reflection += len(step.actions)
329
+
330
+ steps_taken += 1
331
+
332
+ # Check if task is complete
333
+ if step.stop:
334
+ logger.info("OAGI signaled task completion")
335
+ break
336
+
337
+ # Check if reflection is needed
338
+ if self.since_reflection >= self.reflection_interval:
339
+ logger.info("Reflection interval reached")
340
+ break
341
+
342
+ return steps_taken
343
+
344
+ async def _reflect_and_decide(self, image_provider: AsyncImageProvider) -> bool:
345
+ """Reflect on progress and decide whether to continue.
346
+
347
+ Args:
348
+ image_provider: Provider for capturing screenshots
349
+
350
+ Returns:
351
+ True to continue, False to stop
352
+ """
353
+ logger.info("Reflecting on progress")
354
+
355
+ # Capture current screenshot
356
+ screenshot = await image_provider()
357
+
358
+ # Get context
359
+ context = self._get_context()
360
+ context["current_todo"] = self.current_todo
361
+
362
+ # Get recent actions for reflection
363
+ recent_actions = self.actions[-self.since_reflection :]
364
+
365
+ # Reflect using planner
366
+ reflection = await self.planner.reflect(
367
+ recent_actions,
368
+ context,
369
+ screenshot,
370
+ memory=self.external_memory,
371
+ todo_index=self.todo_index,
372
+ current_instruction=self.current_instruction,
373
+ reflection_interval=self.reflection_interval,
374
+ )
375
+
376
+ # Record reflection
377
+ self._record_action(
378
+ action_type="reflect",
379
+ target=None,
380
+ reasoning=reflection.reasoning,
381
+ result=("continue" if reflection.continue_current else "pivot"),
382
+ )
383
+
384
+ # Emit plan event for reflection
385
+ if self.step_observer:
386
+ decision = (
387
+ "success"
388
+ if reflection.success_assessment
389
+ else ("continue" if reflection.continue_current else "pivot")
390
+ )
391
+ await self.step_observer.on_event(
392
+ PlanEvent(
393
+ phase="reflection",
394
+ image=_serialize_image(screenshot),
395
+ reasoning=reflection.reasoning,
396
+ result=decision,
397
+ )
398
+ )
399
+
400
+ # Update success assessment
401
+ if reflection.success_assessment:
402
+ self.success = True
403
+ logger.info("Reflection indicates task is successful")
404
+ return False
405
+
406
+ # Reset reflection counter
407
+ self.since_reflection = 0
408
+
409
+ # Update instruction if needed
410
+ if not reflection.continue_current and reflection.new_instruction:
411
+ logger.info(f"Pivoting to new instruction: {reflection.new_instruction}")
412
+ self.current_instruction = reflection.new_instruction
413
+
414
+ # the following line create a new actor
415
+ await self.actor.init_task(
416
+ self.current_instruction, max_steps=self.max_steps
417
+ )
418
+ return True
419
+
420
+ return reflection.continue_current
421
+
422
+ async def _generate_summary(self) -> None:
423
+ """Generate execution summary."""
424
+ logger.info("Generating execution summary")
425
+
426
+ context = self._get_context()
427
+ context["current_todo"] = self.current_todo
428
+
429
+ summary = await self.planner.summarize(
430
+ self.actions,
431
+ context,
432
+ memory=self.external_memory,
433
+ todo_index=self.todo_index,
434
+ )
435
+
436
+ # Record summary
437
+ self._record_action(
438
+ action_type="summary",
439
+ target=None,
440
+ reasoning=summary,
441
+ )
442
+
443
+ # Emit plan event for summary
444
+ if self.step_observer:
445
+ await self.step_observer.on_event(
446
+ PlanEvent(
447
+ phase="summary",
448
+ image=None,
449
+ reasoning=summary,
450
+ result=None,
451
+ )
452
+ )
453
+
454
+ logger.info(f"Execution summary: {summary}")
455
+
456
+ def _record_action(
457
+ self,
458
+ action_type: str,
459
+ target: str | None,
460
+ reasoning: str | None = None,
461
+ result: str | None = None,
462
+ screenshot_uuid: str | None = None,
463
+ ) -> None:
464
+ """Record an action to the history.
465
+
466
+ Args:
467
+ action_type: Type of action
468
+ target: Target of the action
469
+ reasoning: Reasoning for the action
470
+ result: Result of the action
471
+ screenshot_uuid: UUID of uploaded screenshot for this action
472
+ """
473
+ action = Action(
474
+ timestamp=datetime.now().isoformat(),
475
+ action_type=action_type,
476
+ target=target,
477
+ reasoning=reasoning,
478
+ result=result,
479
+ details={},
480
+ screenshot_uuid=screenshot_uuid,
481
+ )
482
+ self.actions.append(action)
483
+
484
+ def _get_context(self) -> dict[str, Any]:
485
+ """Get execution context.
486
+
487
+ Returns:
488
+ Dictionary with context information
489
+ """
490
+ if self.external_memory:
491
+ return self.external_memory.get_context()
492
+ return {}
493
+
494
+ def return_execution_results(self) -> ExecutionResult:
495
+ """Return the execution results.
496
+
497
+ Returns:
498
+ ExecutionResult with success status, actions, and summary
499
+ """
500
+ # Find summary in actions
501
+ summary = ""
502
+ for action in reversed(self.actions):
503
+ if action.action_type == "summary":
504
+ summary = action.reasoning or ""
505
+ break
506
+
507
+ return ExecutionResult(
508
+ success=self.success,
509
+ actions=self.actions,
510
+ summary=summary,
511
+ total_steps=self.total_actions,
512
+ )