PyPI - oagi-core - Versions diffs - 0.10.1__py3-none-any.whl - Mend

oagi-core 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

oagi/__init__.py +148 -0
oagi/agent/__init__.py +33 -0
oagi/agent/default.py +124 -0
oagi/agent/factories.py +74 -0
oagi/agent/observer/__init__.py +38 -0
oagi/agent/observer/agent_observer.py +99 -0
oagi/agent/observer/events.py +28 -0
oagi/agent/observer/exporters.py +445 -0
oagi/agent/observer/protocol.py +12 -0
oagi/agent/protocol.py +55 -0
oagi/agent/registry.py +155 -0
oagi/agent/tasker/__init__.py +33 -0
oagi/agent/tasker/memory.py +160 -0
oagi/agent/tasker/models.py +77 -0
oagi/agent/tasker/planner.py +408 -0
oagi/agent/tasker/taskee_agent.py +512 -0
oagi/agent/tasker/tasker_agent.py +324 -0
oagi/cli/__init__.py +11 -0
oagi/cli/agent.py +281 -0
oagi/cli/display.py +56 -0
oagi/cli/main.py +77 -0
oagi/cli/server.py +94 -0
oagi/cli/tracking.py +55 -0
oagi/cli/utils.py +89 -0
oagi/client/__init__.py +12 -0
oagi/client/async_.py +290 -0
oagi/client/base.py +457 -0
oagi/client/sync.py +293 -0
oagi/exceptions.py +118 -0
oagi/handler/__init__.py +24 -0
oagi/handler/_macos.py +55 -0
oagi/handler/async_pyautogui_action_handler.py +44 -0
oagi/handler/async_screenshot_maker.py +47 -0
oagi/handler/pil_image.py +102 -0
oagi/handler/pyautogui_action_handler.py +291 -0
oagi/handler/screenshot_maker.py +41 -0
oagi/logging.py +55 -0
oagi/server/__init__.py +13 -0
oagi/server/agent_wrappers.py +98 -0
oagi/server/config.py +46 -0
oagi/server/main.py +157 -0
oagi/server/models.py +98 -0
oagi/server/session_store.py +116 -0
oagi/server/socketio_server.py +405 -0
oagi/task/__init__.py +21 -0
oagi/task/async_.py +101 -0
oagi/task/async_short.py +76 -0
oagi/task/base.py +157 -0
oagi/task/short.py +76 -0
oagi/task/sync.py +99 -0
oagi/types/__init__.py +50 -0
oagi/types/action_handler.py +30 -0
oagi/types/async_action_handler.py +30 -0
oagi/types/async_image_provider.py +38 -0
oagi/types/image.py +17 -0
oagi/types/image_provider.py +35 -0
oagi/types/models/__init__.py +32 -0
oagi/types/models/action.py +33 -0
oagi/types/models/client.py +68 -0
oagi/types/models/image_config.py +47 -0
oagi/types/models/step.py +17 -0
oagi/types/step_observer.py +93 -0
oagi/types/url.py +3 -0
oagi_core-0.10.1.dist-info/METADATA +245 -0
oagi_core-0.10.1.dist-info/RECORD +68 -0
oagi_core-0.10.1.dist-info/WHEEL +4 -0
oagi_core-0.10.1.dist-info/entry_points.txt +2 -0
oagi_core-0.10.1.dist-info/licenses/LICENSE +21 -0

oagi/agent/tasker/memory.py ADDED Viewed

@@ -0,0 +1,160 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from typing import Any
+from .models import Action, Todo, TodoHistory, TodoStatus
+class PlannerMemory:
+    """In-memory state management for the planner agent.
+    This class manages the hierarchical task execution state for TaskerAgent.
+    It provides methods for:
+    - Task/todo management
+    - Execution history tracking
+    - Memory state serialization
+    Context formatting for backend API calls is handled by the backend.
+    """
+    def __init__(self):
+        """Initialize empty memory."""
+        self.task_description: str = ""
+        self.todos: list[Todo] = []
+        self.history: list[TodoHistory] = []
+        self.task_execution_summary: str = ""
+        self.todo_execution_summaries: dict[int, str] = {}
+    def set_task(
+        self,
+        task_description: str,
+        todos: list[str] | list[Todo],
+    ) -> None:
+        """Set the task and todos.
+        Args:
+            task_description: Overall task description
+            todos: List of todo items (strings or Todo objects)
+        """
+        self.task_description = task_description
+        # Convert todos
+        self.todos = []
+        for todo in todos:
+            if isinstance(todo, str):
+                self.todos.append(Todo(description=todo))
+            else:
+                self.todos.append(todo)
+    def get_current_todo(self) -> tuple[Todo | None, int]:
+        """Get the next pending or in-progress todo.
+        Returns:
+            Tuple of (Todo object, index) or (None, -1) if no todos remain
+        """
+        for idx, todo in enumerate(self.todos):
+            if todo.status in [TodoStatus.PENDING, TodoStatus.IN_PROGRESS]:
+                return todo, idx
+        return None, -1
+    def update_todo(
+        self,
+        index: int,
+        status: TodoStatus | str,
+        summary: str | None = None,
+    ) -> None:
+        """Update a todo's status and optionally its summary.
+        Args:
+            index: Index of the todo to update
+            status: New status for the todo
+            summary: Optional execution summary
+        """
+        if 0 <= index < len(self.todos):
+            if isinstance(status, str):
+                status = TodoStatus(status)
+            self.todos[index].status = status
+            if summary:
+                self.todo_execution_summaries[index] = summary
+    def add_history(
+        self,
+        todo_index: int,
+        actions: list[Action],
+        summary: str | None = None,
+        completed: bool = False,
+    ) -> None:
+        """Add execution history for a todo.
+        Args:
+            todo_index: Index of the todo
+            actions: List of actions taken
+            summary: Optional execution summary
+            completed: Whether the todo was completed
+        """
+        if 0 <= todo_index < len(self.todos):
+            self.history.append(
+                TodoHistory(
+                    todo_index=todo_index,
+                    todo=self.todos[todo_index].description,
+                    actions=actions,
+                    summary=summary,
+                    completed=completed,
+                )
+            )
+    def get_context(self) -> dict[str, Any]:
+        """Get the full context for planning/reflection.
+        Returns:
+            Dictionary containing all memory state
+        """
+        return {
+            "task_description": self.task_description,
+            "todos": [
+                {"index": i, "description": t.description, "status": t.status}
+                for i, t in enumerate(self.todos)
+            ],
+            "history": [
+                {
+                    "todo_index": h.todo_index,
+                    "todo": h.todo,
+                    "action_count": len(h.actions),
+                    "summary": h.summary,
+                    "completed": h.completed,
+                }
+                for h in self.history
+            ],
+            "task_execution_summary": self.task_execution_summary,
+            "todo_execution_summaries": self.todo_execution_summaries,
+        }
+    def get_todo_status_summary(self) -> dict[str, int]:
+        """Get a summary of todo statuses.
+        Returns:
+            Dictionary with counts for each status
+        """
+        summary = {
+            TodoStatus.PENDING: 0,
+            TodoStatus.IN_PROGRESS: 0,
+            TodoStatus.COMPLETED: 0,
+            TodoStatus.SKIPPED: 0,
+        }
+        for todo in self.todos:
+            summary[todo.status] += 1
+        return summary
+    def append_todo(self, description: str) -> None:
+        """Append a new todo to the list.
+        Args:
+            description: Description of the new todo
+        """
+        self.todos.append(Todo(description=description))

oagi/agent/tasker/models.py ADDED Viewed

@@ -0,0 +1,77 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel, Field
+class TodoStatus(str, Enum):
+    """Status of a todo item in the workflow."""
+    PENDING = "pending"
+    IN_PROGRESS = "in_progress"
+    COMPLETED = "completed"
+    SKIPPED = "skipped"
+class Todo(BaseModel):
+    """A single todo item in the workflow."""
+    description: str
+    status: TodoStatus = TodoStatus.PENDING
+class Action(BaseModel):
+    """An action taken during execution."""
+    timestamp: str
+    action_type: str  # "plan", "reflect", "click", "type", "scroll", etc.
+    target: str | None = None
+    details: dict[str, Any] = Field(default_factory=dict)
+    reasoning: str | None = None
+    result: str | None = None
+    screenshot_uuid: str | None = None  # UUID of uploaded screenshot for this action
+class TodoHistory(BaseModel):
+    """Execution history for a specific todo."""
+    todo_index: int
+    todo: str
+    actions: list[Action]
+    summary: str | None = None
+    completed: bool = False
+class PlannerOutput(BaseModel):
+    """Output from the LLM planner's initial planning."""
+    instruction: str  # Clear instruction for the todo
+    reasoning: str  # Planner's reasoning
+    subtodos: list[str] = Field(default_factory=list)  # Optional subtasks
+class ReflectionOutput(BaseModel):
+    """Output from the LLM planner's reflection."""
+    continue_current: bool  # Whether to continue with current approach
+    new_instruction: str | None = None  # New instruction if pivoting
+    reasoning: str  # Reflection reasoning
+    success_assessment: bool = False  # Whether the task appears successful
+class ExecutionResult(BaseModel):
+    """Result from executing a single todo."""
+    success: bool
+    actions: list[Action]
+    summary: str
+    error: str | None = None
+    total_steps: int = 0

oagi/agent/tasker/planner.py ADDED Viewed

@@ -0,0 +1,408 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import json
+from typing import Any
+from ...client import AsyncClient
+from ...types import URL, Image
+from .memory import PlannerMemory
+from .models import Action, PlannerOutput, ReflectionOutput
+class Planner:
+    """Planner for task decomposition and reflection.
+    This class provides planning and reflection capabilities using OAGI workers.
+    """
+    def __init__(
+        self,
+        client: AsyncClient | None = None,
+        api_key: str | None = None,
+        base_url: str | None = None,
+    ):
+        """Initialize the planner.
+        Args:
+            client: AsyncClient for OAGI API calls. If None, one will be created when needed.
+            api_key: API key for creating internal client
+            base_url: Base URL for creating internal client
+        """
+        self.client = client
+        self.api_key = api_key
+        self.base_url = base_url
+        self._owns_client = False  # Track if we created the client
+    def _ensure_client(self) -> AsyncClient:
+        """Ensure we have a client, creating one if needed."""
+        if not self.client:
+            self.client = AsyncClient(api_key=self.api_key, base_url=self.base_url)
+            self._owns_client = True
+        return self.client
+    async def close(self):
+        """Close the client if we own it."""
+        if self._owns_client and self.client:
+            await self.client.close()
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.close()
+    def _extract_memory_data(
+        self,
+        memory: PlannerMemory | None,
+        context: dict[str, Any],
+        todo_index: int | None = None,
+    ) -> tuple[str, list, list, str | None, str]:
+        """Extract memory data for API calls.
+        Args:
+            memory: Optional PlannerMemory instance
+            context: Fallback context dictionary
+            todo_index: Optional todo index for extracting overall_todo
+        Returns:
+            Tuple of (task_description, todos, history,
+                     task_execution_summary, overall_todo)
+        """
+        if memory and todo_index is not None:
+            # Use memory data
+            task_description = memory.task_description
+            todos = [
+                {
+                    "index": i,
+                    "description": t.description,
+                    "status": t.status.value,
+                    "execution_summary": memory.todo_execution_summaries.get(i),
+                }
+                for i, t in enumerate(memory.todos)
+            ]
+            history = [
+                {
+                    "todo_index": h.todo_index,
+                    "todo_description": h.todo,
+                    "action_count": len(h.actions),
+                    "summary": h.summary,
+                    "completed": h.completed,
+                }
+                for h in memory.history
+            ]
+            task_execution_summary = memory.task_execution_summary or None
+            overall_todo = memory.todos[todo_index].description if memory.todos else ""
+        else:
+            # Fallback to basic context
+            task_description = context.get("task_description", "")
+            todos = context.get("todos", [])
+            history = context.get("history", [])
+            task_execution_summary = None
+            overall_todo = context.get("current_todo", "")
+        return (
+            task_description,
+            todos,
+            history,
+            task_execution_summary,
+            overall_todo,
+        )
+    async def initial_plan(
+        self,
+        todo: str,
+        context: dict[str, Any],
+        screenshot: Image | URL | None = None,
+        memory: PlannerMemory | None = None,
+        todo_index: int | None = None,
+    ) -> PlannerOutput:
+        """Generate initial plan for a todo.
+        Args:
+            todo: The todo description to plan for
+            context: Full context including task, todos, deliverables, and history
+            screenshot: Optional screenshot for visual context
+            memory: Optional PlannerMemory for formatting contexts
+            todo_index: Optional todo index for formatting internal context
+        Returns:
+            PlannerOutput with instruction, reasoning, and optional subtodos
+        """
+        # Ensure we have a client
+        client = self._ensure_client()
+        # Upload screenshot if provided
+        screenshot_uuid = None
+        if screenshot:
+            upload_response = await client.put_s3_presigned_url(screenshot)
+            screenshot_uuid = upload_response.uuid
+        # Extract memory data if provided
+        (
+            task_description,
+            todos,
+            history,
+            task_execution_summary,
+            _,  # overall_todo not needed here, we use the `todo` parameter
+        ) = self._extract_memory_data(memory, context, todo_index)
+        # Call OAGI worker
+        response = await client.call_worker(
+            worker_id="oagi_first",
+            overall_todo=todo,
+            task_description=task_description,
+            todos=todos,
+            history=history,
+            current_todo_index=todo_index,
+            task_execution_summary=task_execution_summary,
+            current_screenshot=screenshot_uuid,
+        )
+        # Parse response
+        return self._parse_planner_output(response.response)
+    async def reflect(
+        self,
+        actions: list[Action],
+        context: dict[str, Any],
+        screenshot: Image | URL | None = None,
+        memory: PlannerMemory | None = None,
+        todo_index: int | None = None,
+        current_instruction: str | None = None,
+        reflection_interval: int = 4,
+    ) -> ReflectionOutput:
+        """Reflect on recent actions and progress.
+        Args:
+            actions: Recent actions to reflect on
+            context: Full context including task, todos, deliverables, and history
+            screenshot: Optional current screenshot
+            memory: Optional PlannerMemory for formatting contexts
+            todo_index: Optional todo index for formatting internal context
+            current_instruction: Current subtask instruction being executed
+            reflection_interval: Window size for recent actions/screenshots
+        Returns:
+            ReflectionOutput with continuation decision and reasoning
+        """
+        # Ensure we have a client
+        client = self._ensure_client()
+        # Upload screenshot if provided
+        result_screenshot_uuid = None
+        if screenshot:
+            upload_response = await client.put_s3_presigned_url(screenshot)
+            result_screenshot_uuid = upload_response.uuid
+        # Extract memory data if provided
+        (
+            task_description,
+            todos,
+            history,
+            task_execution_summary,
+            overall_todo,
+        ) = self._extract_memory_data(memory, context, todo_index)
+        # Get window of recent actions based on reflection_interval
+        window_actions = actions[-reflection_interval:]
+        # Convert actions to window_steps format
+        window_steps = [
+            {
+                "step_number": i + 1,
+                "action_type": action.action_type,
+                "target": action.target or "",
+                "reasoning": action.reasoning or "",
+            }
+            for i, action in enumerate(window_actions)
+        ]
+        # Extract screenshot UUIDs from window actions
+        window_screenshots = [
+            action.screenshot_uuid
+            for action in window_actions
+            if action.screenshot_uuid
+        ]
+        # Format prior notes from context (still needed as a simple string summary)
+        prior_notes = self._format_execution_notes(context)
+        # Call OAGI worker
+        response = await client.call_worker(
+            worker_id="oagi_follow",
+            overall_todo=overall_todo,
+            task_description=task_description,
+            todos=todos,
+            history=history,
+            current_todo_index=todo_index,
+            task_execution_summary=task_execution_summary,
+            current_subtask_instruction=current_instruction or "",
+            window_steps=window_steps,
+            window_screenshots=window_screenshots,
+            result_screenshot=result_screenshot_uuid,
+            prior_notes=prior_notes,
+        )
+        # Parse response
+        return self._parse_reflection_output(response.response)
+    async def summarize(
+        self,
+        execution_history: list[Action],
+        context: dict[str, Any],
+        memory: PlannerMemory | None = None,
+        todo_index: int | None = None,
+    ) -> str:
+        """Generate execution summary.
+        Args:
+            execution_history: Complete execution history
+            context: Full context including task, todos, deliverables
+            memory: Optional PlannerMemory for formatting contexts
+            todo_index: Optional todo index for formatting internal context
+        Returns:
+            String summary of the execution
+        """
+        # Ensure we have a client
+        client = self._ensure_client()
+        # Extract memory data if provided
+        (
+            task_description,
+            todos,
+            history,
+            task_execution_summary,
+            overall_todo,
+        ) = self._extract_memory_data(memory, context, todo_index)
+        # Extract latest_todo_summary (specific to summarize method)
+        if memory and todo_index is not None:
+            latest_todo_summary = memory.todo_execution_summaries.get(todo_index, "")
+        else:
+            latest_todo_summary = ""
+        # Call OAGI worker
+        response = await client.call_worker(
+            worker_id="oagi_task_summary",
+            overall_todo=overall_todo,
+            task_description=task_description,
+            todos=todos,
+            history=history,
+            current_todo_index=todo_index,
+            task_execution_summary=task_execution_summary,
+            latest_todo_summary=latest_todo_summary,
+        )
+        # Parse response and extract summary
+        try:
+            result = json.loads(response.response)
+            return result.get("task_summary", response.response)
+        except json.JSONDecodeError:
+            return response.response
+    def _format_execution_notes(self, context: dict[str, Any]) -> str:
+        """Format execution history notes.
+        Args:
+            context: Context dictionary
+        Returns:
+            Formatted execution notes
+        """
+        if not context.get("history"):
+            return ""
+        parts = []
+        for hist in context["history"]:
+            parts.append(
+                f"Todo {hist['todo_index']}: {hist['action_count']} actions, "
+                f"completed: {hist['completed']}"
+            )
+            if hist.get("summary"):
+                parts.append(f"Summary: {hist['summary']}")
+        return "\n".join(parts)
+    def _parse_planner_output(self, response: str) -> PlannerOutput:
+        """Parse OAGI worker response into structured planner output.
+        Args:
+            response: Raw string response from OAGI worker (oagi_first)
+        Returns:
+            Structured PlannerOutput
+        """
+        try:
+            # Try to parse as JSON (oagi_first format)
+            # Extract JSON string to handle Markdown code blocks
+            json_response = self._extract_json_str(response)
+            data = json.loads(json_response)
+            # oagi_first returns: {"reasoning": "...", "subtask": "..."}
+            return PlannerOutput(
+                instruction=data.get("subtask", data.get("instruction", "")),
+                reasoning=data.get("reasoning", ""),
+                subtodos=data.get(
+                    "subtodos", []
+                ),  # Not typically returned by oagi_first
+            )
+        except (json.JSONDecodeError, KeyError):
+            # Fallback: use the entire response as instruction
+            return PlannerOutput(
+                instruction="",
+                reasoning="Failed to parse structured response",
+                subtodos=[],
+            )
+    def _parse_reflection_output(self, response: str) -> ReflectionOutput:
+        """Parse reflection response into structured output.
+        Args:
+            response: Raw string response from OAGI worker (oagi_follow)
+        Returns:
+            Structured ReflectionOutput
+        """
+        try:
+            # Try to parse as JSON (oagi_follow format)
+            json_response = self._extract_json_str(response)
+            data = json.loads(json_response)
+            # oagi_follow returns:
+            # {"assessment": "...", "summary": "...", "reflection": "...",
+            #  "success": "yes" | "no", "subtask_instruction": "..."}
+            # Determine if we should continue or pivot
+            success = data.get("success", "no") == "yes"
+            new_subtask = data.get("subtask_instruction", "").strip()
+            # Continue current if success is not achieved and no new subtask provided
+            # Pivot if a new subtask instruction is provided
+            continue_current = not success and not new_subtask
+            return ReflectionOutput(
+                continue_current=continue_current,
+                new_instruction=new_subtask if new_subtask else None,
+                reasoning=data.get("reflection", data.get("reasoning", "")),
+                success_assessment=success,
+            )
+        except (json.JSONDecodeError, KeyError):
+            # Fallback: continue with current approach
+            return ReflectionOutput(
+                continue_current=True,
+                new_instruction=None,
+                reasoning="Failed to parse reflection response, continuing current approach",
+                success_assessment=False,
+            )
+    def _extract_json_str(self, text: str) -> str:
+        start = text.find("{")
+        end = text.rfind("}") + 1
+        if start < 0 or end <= start:
+            return ""
+        return text[start:end]