oagi-core 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. oagi/__init__.py +108 -0
  2. oagi/agent/__init__.py +31 -0
  3. oagi/agent/default.py +75 -0
  4. oagi/agent/factories.py +50 -0
  5. oagi/agent/protocol.py +55 -0
  6. oagi/agent/registry.py +155 -0
  7. oagi/agent/tasker/__init__.py +35 -0
  8. oagi/agent/tasker/memory.py +184 -0
  9. oagi/agent/tasker/models.py +83 -0
  10. oagi/agent/tasker/planner.py +385 -0
  11. oagi/agent/tasker/taskee_agent.py +395 -0
  12. oagi/agent/tasker/tasker_agent.py +323 -0
  13. oagi/async_pyautogui_action_handler.py +44 -0
  14. oagi/async_screenshot_maker.py +47 -0
  15. oagi/async_single_step.py +85 -0
  16. oagi/cli/__init__.py +11 -0
  17. oagi/cli/agent.py +125 -0
  18. oagi/cli/main.py +77 -0
  19. oagi/cli/server.py +94 -0
  20. oagi/cli/utils.py +82 -0
  21. oagi/client/__init__.py +12 -0
  22. oagi/client/async_.py +293 -0
  23. oagi/client/base.py +465 -0
  24. oagi/client/sync.py +296 -0
  25. oagi/exceptions.py +118 -0
  26. oagi/logging.py +47 -0
  27. oagi/pil_image.py +102 -0
  28. oagi/pyautogui_action_handler.py +268 -0
  29. oagi/screenshot_maker.py +41 -0
  30. oagi/server/__init__.py +13 -0
  31. oagi/server/agent_wrappers.py +98 -0
  32. oagi/server/config.py +46 -0
  33. oagi/server/main.py +157 -0
  34. oagi/server/models.py +98 -0
  35. oagi/server/session_store.py +116 -0
  36. oagi/server/socketio_server.py +405 -0
  37. oagi/single_step.py +87 -0
  38. oagi/task/__init__.py +14 -0
  39. oagi/task/async_.py +97 -0
  40. oagi/task/async_short.py +64 -0
  41. oagi/task/base.py +121 -0
  42. oagi/task/short.py +64 -0
  43. oagi/task/sync.py +97 -0
  44. oagi/types/__init__.py +28 -0
  45. oagi/types/action_handler.py +30 -0
  46. oagi/types/async_action_handler.py +30 -0
  47. oagi/types/async_image_provider.py +37 -0
  48. oagi/types/image.py +17 -0
  49. oagi/types/image_provider.py +34 -0
  50. oagi/types/models/__init__.py +32 -0
  51. oagi/types/models/action.py +33 -0
  52. oagi/types/models/client.py +64 -0
  53. oagi/types/models/image_config.py +47 -0
  54. oagi/types/models/step.py +17 -0
  55. oagi/types/url_image.py +47 -0
  56. oagi_core-0.9.0.dist-info/METADATA +257 -0
  57. oagi_core-0.9.0.dist-info/RECORD +60 -0
  58. oagi_core-0.9.0.dist-info/WHEEL +4 -0
  59. oagi_core-0.9.0.dist-info/entry_points.txt +2 -0
  60. oagi_core-0.9.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,83 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ from enum import Enum
10
+ from typing import Any
11
+
12
+ from pydantic import BaseModel, Field
13
+
14
+
15
+ class TodoStatus(str, Enum):
16
+ """Status of a todo item in the workflow."""
17
+
18
+ PENDING = "pending"
19
+ IN_PROGRESS = "in_progress"
20
+ COMPLETED = "completed"
21
+ SKIPPED = "skipped"
22
+
23
+
24
+ class Todo(BaseModel):
25
+ """A single todo item in the workflow."""
26
+
27
+ description: str
28
+ status: TodoStatus = TodoStatus.PENDING
29
+
30
+
31
+ class Deliverable(BaseModel):
32
+ """A deliverable or goal to be achieved."""
33
+
34
+ description: str
35
+ achieved: bool = False
36
+
37
+
38
+ class Action(BaseModel):
39
+ """An action taken during execution."""
40
+
41
+ timestamp: str
42
+ action_type: str # "plan", "reflect", "click", "type", "scroll", etc.
43
+ target: str | None = None
44
+ details: dict[str, Any] = Field(default_factory=dict)
45
+ reasoning: str | None = None
46
+ result: str | None = None
47
+
48
+
49
+ class TodoHistory(BaseModel):
50
+ """Execution history for a specific todo."""
51
+
52
+ todo_index: int
53
+ todo: str
54
+ actions: list[Action]
55
+ summary: str | None = None
56
+ completed: bool = False
57
+
58
+
59
+ class PlannerOutput(BaseModel):
60
+ """Output from the LLM planner's initial planning."""
61
+
62
+ instruction: str # Clear instruction for the todo
63
+ reasoning: str # Planner's reasoning
64
+ subtodos: list[str] = Field(default_factory=list) # Optional subtasks
65
+
66
+
67
+ class ReflectionOutput(BaseModel):
68
+ """Output from the LLM planner's reflection."""
69
+
70
+ continue_current: bool # Whether to continue with current approach
71
+ new_instruction: str | None = None # New instruction if pivoting
72
+ reasoning: str # Reflection reasoning
73
+ success_assessment: bool = False # Whether the task appears successful
74
+
75
+
76
+ class ExecutionResult(BaseModel):
77
+ """Result from executing a single todo."""
78
+
79
+ success: bool
80
+ actions: list[Action]
81
+ summary: str
82
+ error: str | None = None
83
+ total_steps: int = 0
@@ -0,0 +1,385 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import json
10
+ from typing import Any
11
+
12
+ from ...client import AsyncClient
13
+ from .memory import PlannerMemory
14
+ from .models import Action, PlannerOutput, ReflectionOutput
15
+
16
+
17
+ class Planner:
18
+ """Planner for task decomposition and reflection.
19
+
20
+ This class provides planning and reflection capabilities using OAGI workers.
21
+ """
22
+
23
+ def __init__(self, client: AsyncClient | None = None):
24
+ """Initialize the planner.
25
+
26
+ Args:
27
+ client: AsyncClient for OAGI API calls. If None, one will be created when needed.
28
+ """
29
+ self.client = client
30
+ self._owns_client = False # Track if we created the client
31
+
32
+ def _ensure_client(self) -> AsyncClient:
33
+ """Ensure we have a client, creating one if needed."""
34
+ if not self.client:
35
+ self.client = AsyncClient()
36
+ self._owns_client = True
37
+ return self.client
38
+
39
+ async def close(self):
40
+ """Close the client if we own it."""
41
+ if self._owns_client and self.client:
42
+ await self.client.close()
43
+
44
+ async def __aenter__(self):
45
+ return self
46
+
47
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
48
+ await self.close()
49
+
50
+ def _extract_memory_data(
51
+ self,
52
+ memory: PlannerMemory | None,
53
+ context: dict[str, Any],
54
+ todo_index: int | None = None,
55
+ ) -> tuple[str, list, list, list, str | None, str]:
56
+ """Extract memory data for API calls.
57
+
58
+ Args:
59
+ memory: Optional PlannerMemory instance
60
+ context: Fallback context dictionary
61
+ todo_index: Optional todo index for extracting overall_todo
62
+
63
+ Returns:
64
+ Tuple of (task_description, todos, deliverables, history,
65
+ task_execution_summary, overall_todo)
66
+ """
67
+ if memory and todo_index is not None:
68
+ # Use memory data
69
+ task_description = memory.task_description
70
+ todos = [
71
+ {
72
+ "index": i,
73
+ "description": t.description,
74
+ "status": t.status.value,
75
+ "execution_summary": memory.todo_execution_summaries.get(i),
76
+ }
77
+ for i, t in enumerate(memory.todos)
78
+ ]
79
+ deliverables = [d.model_dump() for d in memory.deliverables]
80
+ history = [
81
+ {
82
+ "todo_index": h.todo_index,
83
+ "todo_description": h.todo,
84
+ "action_count": len(h.actions),
85
+ "summary": h.summary,
86
+ "completed": h.completed,
87
+ }
88
+ for h in memory.history
89
+ ]
90
+ task_execution_summary = memory.task_execution_summary or None
91
+ overall_todo = memory.todos[todo_index].description if memory.todos else ""
92
+ else:
93
+ # Fallback to basic context
94
+ task_description = context.get("task_description", "")
95
+ todos = context.get("todos", [])
96
+ deliverables = context.get("deliverables", [])
97
+ history = context.get("history", [])
98
+ task_execution_summary = None
99
+ overall_todo = context.get("current_todo", "")
100
+
101
+ return (
102
+ task_description,
103
+ todos,
104
+ deliverables,
105
+ history,
106
+ task_execution_summary,
107
+ overall_todo,
108
+ )
109
+
110
+ async def initial_plan(
111
+ self,
112
+ todo: str,
113
+ context: dict[str, Any],
114
+ screenshot: bytes | None = None,
115
+ memory: PlannerMemory | None = None,
116
+ todo_index: int | None = None,
117
+ ) -> PlannerOutput:
118
+ """Generate initial plan for a todo.
119
+
120
+ Args:
121
+ todo: The todo description to plan for
122
+ context: Full context including task, todos, deliverables, and history
123
+ screenshot: Optional screenshot for visual context
124
+ memory: Optional PlannerMemory for formatting contexts
125
+ todo_index: Optional todo index for formatting internal context
126
+
127
+ Returns:
128
+ PlannerOutput with instruction, reasoning, and optional subtodos
129
+ """
130
+ # Ensure we have a client
131
+ client = self._ensure_client()
132
+
133
+ # Upload screenshot if provided
134
+ screenshot_uuid = None
135
+ if screenshot:
136
+ upload_response = await client.put_s3_presigned_url(screenshot)
137
+ screenshot_uuid = upload_response.uuid
138
+
139
+ # Extract memory data if provided
140
+ (
141
+ task_description,
142
+ todos,
143
+ deliverables,
144
+ history,
145
+ task_execution_summary,
146
+ _, # overall_todo not needed here, we use the `todo` parameter
147
+ ) = self._extract_memory_data(memory, context, todo_index)
148
+
149
+ # Call OAGI worker
150
+ response = await client.call_worker(
151
+ worker_id="oagi_first",
152
+ overall_todo=todo,
153
+ task_description=task_description,
154
+ todos=todos,
155
+ deliverables=deliverables,
156
+ history=history,
157
+ current_todo_index=todo_index,
158
+ task_execution_summary=task_execution_summary,
159
+ current_screenshot=screenshot_uuid,
160
+ )
161
+
162
+ # Parse response
163
+ return self._parse_planner_output(response.response)
164
+
165
+ async def reflect(
166
+ self,
167
+ actions: list[Action],
168
+ context: dict[str, Any],
169
+ screenshot: bytes | None = None,
170
+ memory: PlannerMemory | None = None,
171
+ todo_index: int | None = None,
172
+ current_instruction: str | None = None,
173
+ ) -> ReflectionOutput:
174
+ """Reflect on recent actions and progress.
175
+
176
+ Args:
177
+ actions: Recent actions to reflect on
178
+ context: Full context including task, todos, deliverables, and history
179
+ screenshot: Optional current screenshot
180
+ memory: Optional PlannerMemory for formatting contexts
181
+ todo_index: Optional todo index for formatting internal context
182
+ current_instruction: Current subtask instruction being executed
183
+
184
+ Returns:
185
+ ReflectionOutput with continuation decision and reasoning
186
+ """
187
+ # Ensure we have a client
188
+ client = self._ensure_client()
189
+
190
+ # Upload screenshot if provided
191
+ result_screenshot_uuid = None
192
+ if screenshot:
193
+ upload_response = await client.put_s3_presigned_url(screenshot)
194
+ result_screenshot_uuid = upload_response.uuid
195
+
196
+ # Extract memory data if provided
197
+ (
198
+ task_description,
199
+ todos,
200
+ deliverables,
201
+ history,
202
+ task_execution_summary,
203
+ overall_todo,
204
+ ) = self._extract_memory_data(memory, context, todo_index)
205
+
206
+ # Convert actions to window_steps format
207
+ window_steps = [
208
+ {
209
+ "step_number": i + 1,
210
+ "action_type": action.action_type,
211
+ "target": action.target or "",
212
+ "reasoning": action.reasoning or "",
213
+ }
214
+ for i, action in enumerate(actions[-10:]) # Last 10 actions
215
+ ]
216
+
217
+ # Format prior notes from context (still needed as a simple string summary)
218
+ prior_notes = self._format_execution_notes(context)
219
+
220
+ # Call OAGI worker
221
+ response = await client.call_worker(
222
+ worker_id="oagi_follow",
223
+ overall_todo=overall_todo,
224
+ task_description=task_description,
225
+ todos=todos,
226
+ deliverables=deliverables,
227
+ history=history,
228
+ current_todo_index=todo_index,
229
+ task_execution_summary=task_execution_summary,
230
+ current_subtask_instruction=current_instruction or "",
231
+ window_steps=window_steps,
232
+ window_screenshots=[], # Could be populated if we track screenshot history
233
+ result_screenshot=result_screenshot_uuid,
234
+ prior_notes=prior_notes,
235
+ )
236
+
237
+ # Parse response
238
+ return self._parse_reflection_output(response.response)
239
+
240
+ async def summarize(
241
+ self,
242
+ execution_history: list[Action],
243
+ context: dict[str, Any],
244
+ memory: PlannerMemory | None = None,
245
+ todo_index: int | None = None,
246
+ ) -> str:
247
+ """Generate execution summary.
248
+
249
+ Args:
250
+ execution_history: Complete execution history
251
+ context: Full context including task, todos, deliverables
252
+ memory: Optional PlannerMemory for formatting contexts
253
+ todo_index: Optional todo index for formatting internal context
254
+
255
+ Returns:
256
+ String summary of the execution
257
+ """
258
+ # Ensure we have a client
259
+ client = self._ensure_client()
260
+
261
+ # Extract memory data if provided
262
+ (
263
+ task_description,
264
+ todos,
265
+ deliverables,
266
+ history,
267
+ task_execution_summary,
268
+ overall_todo,
269
+ ) = self._extract_memory_data(memory, context, todo_index)
270
+
271
+ # Extract latest_todo_summary (specific to summarize method)
272
+ if memory and todo_index is not None:
273
+ latest_todo_summary = memory.todo_execution_summaries.get(todo_index, "")
274
+ else:
275
+ latest_todo_summary = ""
276
+
277
+ # Call OAGI worker
278
+ response = await client.call_worker(
279
+ worker_id="oagi_task_summary",
280
+ overall_todo=overall_todo,
281
+ task_description=task_description,
282
+ todos=todos,
283
+ deliverables=deliverables,
284
+ history=history,
285
+ current_todo_index=todo_index,
286
+ task_execution_summary=task_execution_summary,
287
+ latest_todo_summary=latest_todo_summary,
288
+ )
289
+
290
+ # Parse response and extract summary
291
+ try:
292
+ result = json.loads(response.response)
293
+ return result.get("task_summary", response.response)
294
+ except json.JSONDecodeError:
295
+ return response.response
296
+
297
+ def _format_execution_notes(self, context: dict[str, Any]) -> str:
298
+ """Format execution history notes.
299
+
300
+ Args:
301
+ context: Context dictionary
302
+
303
+ Returns:
304
+ Formatted execution notes
305
+ """
306
+ if not context.get("history"):
307
+ return ""
308
+
309
+ parts = []
310
+ for hist in context["history"]:
311
+ parts.append(
312
+ f"Todo {hist['todo_index']}: {hist['action_count']} actions, "
313
+ f"completed: {hist['completed']}"
314
+ )
315
+ if hist.get("summary"):
316
+ parts.append(f"Summary: {hist['summary']}")
317
+
318
+ return "\n".join(parts)
319
+
320
+ def _parse_planner_output(self, response: str) -> PlannerOutput:
321
+ """Parse OAGI worker response into structured planner output.
322
+
323
+ Args:
324
+ response: Raw string response from OAGI worker (oagi_first)
325
+
326
+ Returns:
327
+ Structured PlannerOutput
328
+ """
329
+ try:
330
+ # Try to parse as JSON (oagi_first format)
331
+ data = json.loads(response)
332
+ # oagi_first returns: {"reasoning": "...", "subtask": "..."}
333
+ return PlannerOutput(
334
+ instruction=data.get("subtask", data.get("instruction", "")),
335
+ reasoning=data.get("reasoning", ""),
336
+ subtodos=data.get(
337
+ "subtodos", []
338
+ ), # Not typically returned by oagi_first
339
+ )
340
+ except (json.JSONDecodeError, KeyError):
341
+ # Fallback: use the entire response as instruction
342
+ return PlannerOutput(
343
+ instruction=response,
344
+ reasoning="Failed to parse structured response",
345
+ subtodos=[],
346
+ )
347
+
348
+ def _parse_reflection_output(self, response: str) -> ReflectionOutput:
349
+ """Parse reflection response into structured output.
350
+
351
+ Args:
352
+ response: Raw string response from OAGI worker (oagi_follow)
353
+
354
+ Returns:
355
+ Structured ReflectionOutput
356
+ """
357
+ try:
358
+ # Try to parse as JSON (oagi_follow format)
359
+ data = json.loads(response)
360
+ # oagi_follow returns:
361
+ # {"assessment": "...", "summary": "...", "reflection": "...",
362
+ # "success": "yes" | "no", "subtask_instruction": "..."}
363
+
364
+ # Determine if we should continue or pivot
365
+ success = data.get("success", "no") == "yes"
366
+ new_subtask = data.get("subtask_instruction", "").strip()
367
+
368
+ # Continue current if success is not achieved and no new subtask provided
369
+ # Pivot if a new subtask instruction is provided
370
+ continue_current = not success and not new_subtask
371
+
372
+ return ReflectionOutput(
373
+ continue_current=continue_current,
374
+ new_instruction=new_subtask if new_subtask else None,
375
+ reasoning=data.get("reflection", data.get("reasoning", "")),
376
+ success_assessment=success,
377
+ )
378
+ except (json.JSONDecodeError, KeyError):
379
+ # Fallback: continue with current approach
380
+ return ReflectionOutput(
381
+ continue_current=True,
382
+ new_instruction=None,
383
+ reasoning="Failed to parse reflection response, continuing current approach",
384
+ success_assessment=False,
385
+ )