PyPI - oagi-core - Versions diffs - 0.10.1__py3-none-any.whl - Mend

oagi-core 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

oagi/__init__.py +148 -0
oagi/agent/__init__.py +33 -0
oagi/agent/default.py +124 -0
oagi/agent/factories.py +74 -0
oagi/agent/observer/__init__.py +38 -0
oagi/agent/observer/agent_observer.py +99 -0
oagi/agent/observer/events.py +28 -0
oagi/agent/observer/exporters.py +445 -0
oagi/agent/observer/protocol.py +12 -0
oagi/agent/protocol.py +55 -0
oagi/agent/registry.py +155 -0
oagi/agent/tasker/__init__.py +33 -0
oagi/agent/tasker/memory.py +160 -0
oagi/agent/tasker/models.py +77 -0
oagi/agent/tasker/planner.py +408 -0
oagi/agent/tasker/taskee_agent.py +512 -0
oagi/agent/tasker/tasker_agent.py +324 -0
oagi/cli/__init__.py +11 -0
oagi/cli/agent.py +281 -0
oagi/cli/display.py +56 -0
oagi/cli/main.py +77 -0
oagi/cli/server.py +94 -0
oagi/cli/tracking.py +55 -0
oagi/cli/utils.py +89 -0
oagi/client/__init__.py +12 -0
oagi/client/async_.py +290 -0
oagi/client/base.py +457 -0
oagi/client/sync.py +293 -0
oagi/exceptions.py +118 -0
oagi/handler/__init__.py +24 -0
oagi/handler/_macos.py +55 -0
oagi/handler/async_pyautogui_action_handler.py +44 -0
oagi/handler/async_screenshot_maker.py +47 -0
oagi/handler/pil_image.py +102 -0
oagi/handler/pyautogui_action_handler.py +291 -0
oagi/handler/screenshot_maker.py +41 -0
oagi/logging.py +55 -0
oagi/server/__init__.py +13 -0
oagi/server/agent_wrappers.py +98 -0
oagi/server/config.py +46 -0
oagi/server/main.py +157 -0
oagi/server/models.py +98 -0
oagi/server/session_store.py +116 -0
oagi/server/socketio_server.py +405 -0
oagi/task/__init__.py +21 -0
oagi/task/async_.py +101 -0
oagi/task/async_short.py +76 -0
oagi/task/base.py +157 -0
oagi/task/short.py +76 -0
oagi/task/sync.py +99 -0
oagi/types/__init__.py +50 -0
oagi/types/action_handler.py +30 -0
oagi/types/async_action_handler.py +30 -0
oagi/types/async_image_provider.py +38 -0
oagi/types/image.py +17 -0
oagi/types/image_provider.py +35 -0
oagi/types/models/__init__.py +32 -0
oagi/types/models/action.py +33 -0
oagi/types/models/client.py +68 -0
oagi/types/models/image_config.py +47 -0
oagi/types/models/step.py +17 -0
oagi/types/step_observer.py +93 -0
oagi/types/url.py +3 -0
oagi_core-0.10.1.dist-info/METADATA +245 -0
oagi_core-0.10.1.dist-info/RECORD +68 -0
oagi_core-0.10.1.dist-info/WHEEL +4 -0
oagi_core-0.10.1.dist-info/entry_points.txt +2 -0
oagi_core-0.10.1.dist-info/licenses/LICENSE +21 -0

oagi/agent/tasker/tasker_agent.py ADDED Viewed

@@ -0,0 +1,324 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import logging
+from typing import Any
+from oagi.types import AsyncActionHandler, AsyncImageProvider, AsyncObserver, SplitEvent
+from ..protocol import AsyncAgent
+from .memory import PlannerMemory
+from .models import TodoStatus
+from .planner import Planner
+from .taskee_agent import TaskeeAgent
+logger = logging.getLogger(__name__)
+class TaskerAgent(AsyncAgent):
+    """Hierarchical agent that manages multi-todo workflows.
+    This agent orchestrates the execution of multiple todos by:
+    1. Managing a workflow with todos and deliverables
+    2. Executing todos sequentially using TaskeeAgent
+    3. Tracking progress and updating memory
+    4. Sharing context between todos for informed execution
+    """
+    def __init__(
+        self,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        model: str = "lux-actor-1",
+        max_steps: int = 60,
+        temperature: float = 0.5,
+        reflection_interval: int = 4,
+        planner: Planner | None = None,
+        step_observer: AsyncObserver | None = None,
+    ):
+        """Initialize the tasker agent.
+        Args:
+            api_key: OAGI API key
+            base_url: OAGI API base URL
+            model: Model to use for vision tasks
+            max_steps: Maximum steps per todo
+            temperature: Sampling temperature
+            reflection_interval: Actions before reflection
+            planner: Planner for planning and reflection
+            step_observer: Optional observer for step tracking
+        """
+        self.api_key = api_key
+        self.base_url = base_url
+        self.model = model
+        self.max_steps = max_steps
+        self.temperature = temperature
+        self.reflection_interval = reflection_interval
+        self.planner = planner or Planner(api_key=api_key, base_url=base_url)
+        self.step_observer = step_observer
+        # Memory for tracking workflow
+        self.memory = PlannerMemory()
+        # Current execution state
+        self.current_taskee_agent: TaskeeAgent | None = None
+        self.current_todo_index: int = -1
+    def set_task(
+        self,
+        task: str,
+        todos: list[str],
+    ) -> None:
+        """Set the task and todos for the workflow.
+        Args:
+            task: Overall task description
+            todos: List of todo descriptions
+        """
+        self.memory.set_task(task, todos)
+        logger.info(f"Task set with {len(todos)} todos")
+    async def execute(
+        self,
+        instruction: str,
+        action_handler: AsyncActionHandler,
+        image_provider: AsyncImageProvider,
+    ) -> bool:
+        """Execute the multi-todo workflow.
+        This method will execute todos sequentially until all are complete
+        or a failure occurs.
+        Args:
+            instruction: Not used in TaskerAgent
+            action_handler: Handler for executing actions
+            image_provider: Provider for capturing screenshots
+        Returns:
+            True if all todos completed successfully, False otherwise
+        """
+        overall_success = True
+        # Execute todos until none remain
+        while True:
+            # Prepare for next todo
+            todo_info = self._prepare()
+            if todo_info is None:
+                # No more todos to execute
+                logger.info("No more todos to execute")
+                break
+            todo, todo_index = todo_info
+            logger.info(f"Executing todo {todo_index}: {todo.description}")
+            # Emit split event at the start of todo
+            if self.step_observer:
+                await self.step_observer.on_event(
+                    SplitEvent(
+                        label=f"Start of todo {todo_index + 1}: {todo.description}"
+                    )
+                )
+            # Execute the todo
+            success = await self._execute_todo(
+                todo_index,
+                action_handler,
+                image_provider,
+            )
+            # Emit split event after each todo
+            if self.step_observer:
+                await self.step_observer.on_event(
+                    SplitEvent(
+                        label=f"End of todo {todo_index + 1}: {todo.description}"
+                    )
+                )
+            if not success:
+                logger.warning(f"Todo {todo_index} failed")
+                overall_success = False
+                # If todo failed due to exception, it stays IN_PROGRESS
+                # Break to avoid infinite loop re-attempting same todo
+                current_status = self.memory.todos[todo_index].status
+                if current_status == TodoStatus.IN_PROGRESS:
+                    logger.error("Todo failed with exception, stopping execution")
+                    break
+                # Otherwise continue with next todo
+            # Update task execution summary
+            self._update_task_summary()
+        # Log final status
+        status_summary = self.memory.get_todo_status_summary()
+        logger.info(f"Workflow complete. Status summary: {status_summary}")
+        return overall_success
+    def _prepare(self) -> tuple[Any, int] | None:
+        """Prepare for the next todo execution.
+        Returns:
+            Tuple of (todo, index) or None if no todos remain
+        """
+        # Get current todo
+        todo, todo_index = self.memory.get_current_todo()
+        if todo is None:
+            return None
+        # Create taskee agent with external memory
+        self.current_taskee_agent = TaskeeAgent(
+            api_key=self.api_key,
+            base_url=self.base_url,
+            model=self.model,
+            max_steps=self.max_steps,  # Smaller steps per subtask
+            reflection_interval=self.reflection_interval,
+            temperature=self.temperature,
+            planner=self.planner,
+            external_memory=self.memory,  # Share memory with child
+            todo_index=todo_index,  # Pass the todo index
+            step_observer=self.step_observer,  # Pass step observer
+        )
+        self.current_todo_index = todo_index
+        # Update todo status to in_progress if it was pending
+        if todo.status == TodoStatus.PENDING:
+            self.memory.update_todo(todo_index, TodoStatus.IN_PROGRESS)
+        logger.info(f"Prepared taskee agent for todo {todo_index}")
+        return todo, todo_index
+    async def _execute_todo(
+        self,
+        todo_index: int,
+        action_handler: AsyncActionHandler,
+        image_provider: AsyncImageProvider,
+    ) -> bool:
+        """Execute a single todo using the todo agent.
+        Args:
+            todo_index: Index of the todo to execute
+            action_handler: Handler for executing actions
+            image_provider: Provider for capturing screenshots
+        Returns:
+            True if successful, False otherwise
+        """
+        if not self.current_taskee_agent or todo_index < 0:
+            logger.error("No taskee agent prepared")
+            return False
+        todo = self.memory.todos[todo_index]
+        try:
+            # Execute using taskee agent
+            success = await self.current_taskee_agent.execute(
+                todo.description,
+                action_handler,
+                image_provider,
+            )
+            # Get execution results
+            results = self.current_taskee_agent.return_execution_results()
+            # Update memory with results
+            self._update_memory_from_execution(todo_index, results, success)
+            return success
+        except Exception as e:
+            logger.error(f"Error executing todo {todo_index}: {e}")
+            # Mark as in_progress (not completed)
+            self.memory.update_todo(
+                todo_index,
+                TodoStatus.IN_PROGRESS,
+                summary=f"Execution failed: {str(e)}",
+            )
+            return False
+    def _update_memory_from_execution(
+        self,
+        todo_index: int,
+        results: Any,
+        success: bool,
+    ) -> None:
+        """Update memory based on execution results.
+        Args:
+            todo_index: Index of the executed todo
+            results: Execution results from todo agent
+            success: Whether execution was successful
+        """
+        # Update todo status
+        status = TodoStatus.COMPLETED if success else TodoStatus.IN_PROGRESS
+        self.memory.update_todo(
+            todo_index,
+            status,
+            summary=results.summary,
+        )
+        # Add to history
+        self.memory.add_history(
+            todo_index,
+            results.actions,
+            summary=results.summary,
+            completed=success,
+        )
+        # Update task execution summary
+        if success:
+            if self.memory.task_execution_summary:
+                self.memory.task_execution_summary += (
+                    f"\n- Completed todo {todo_index}: {results.summary}"
+                )
+            else:
+                self.memory.task_execution_summary = (
+                    f"- Completed todo {todo_index}: {results.summary}"
+                )
+        logger.info(
+            f"Updated memory for todo {todo_index}: "
+            f"status={status}, actions={len(results.actions)}"
+        )
+    def _update_task_summary(self) -> None:
+        """Update the overall task execution summary."""
+        status_summary = self.memory.get_todo_status_summary()
+        completed = status_summary.get(TodoStatus.COMPLETED, 0)
+        total = len(self.memory.todos)
+        summary_parts = [f"Progress: {completed}/{total} todos completed"]
+        # Add recent completions
+        for history in self.memory.history[-3:]:  # Last 3 entries
+            if history.completed and history.summary:
+                summary_parts.append(
+                    f"- Todo {history.todo_index}: {history.summary[:100]}"
+                )
+        self.memory.task_execution_summary = "\n".join(summary_parts)
+    def get_memory(self) -> PlannerMemory:
+        """Get the current memory state.
+        Returns:
+            Current PlannerMemory instance
+        """
+        return self.memory
+    def append_todo(self, description: str) -> None:
+        """Dynamically append a new todo to the workflow.
+        Args:
+            description: Description of the new todo
+        """
+        self.memory.append_todo(description)
+        logger.info(f"Appended new todo: {description}")

oagi/cli/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from oagi.cli.main import main
+__all__ = ["main"]

oagi/cli/agent.py ADDED Viewed

@@ -0,0 +1,281 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import argparse
+import asyncio
+import os
+import sys
+import time
+import traceback
+from oagi.agent.observer import AsyncAgentObserver
+from oagi.exceptions import check_optional_dependency
+from .display import display_step_table
+from .tracking import StepTracker
+def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
+    agent_parser = subparsers.add_parser("agent", help="Agent execution commands")
+    agent_subparsers = agent_parser.add_subparsers(dest="agent_command", required=True)
+    # agent run command
+    run_parser = agent_subparsers.add_parser(
+        "run", help="Run an agent with the given instruction"
+    )
+    run_parser.add_argument(
+        "instruction", type=str, help="Task instruction for the agent to execute"
+    )
+    run_parser.add_argument(
+        "--model", type=str, help="Model to use (default: lux-actor-1)"
+    )
+    run_parser.add_argument(
+        "--max-steps", type=int, help="Maximum number of steps (default: 20)"
+    )
+    run_parser.add_argument(
+        "--temperature", type=float, help="Sampling temperature (default: 0.5)"
+    )
+    run_parser.add_argument(
+        "--mode",
+        type=str,
+        default="actor",
+        help="Agent mode to use (default: actor). Available modes: actor, planner",
+    )
+    run_parser.add_argument(
+        "--oagi-api-key", type=str, help="OAGI API key (default: OAGI_API_KEY env var)"
+    )
+    run_parser.add_argument(
+        "--oagi-base-url",
+        type=str,
+        help="OAGI base URL (default: https://api.agiopen.org, or OAGI_BASE_URL env var)",
+    )
+    run_parser.add_argument(
+        "--export",
+        type=str,
+        choices=["markdown", "html", "json"],
+        help="Export execution history to file (markdown, html, or json)",
+    )
+    run_parser.add_argument(
+        "--export-file",
+        type=str,
+        help="Output file path for export (default: execution_report.[md|html|json])",
+    )
+    # agent permission command
+    agent_subparsers.add_parser(
+        "permission",
+        help="Check macOS permissions for screen recording and accessibility",
+    )
+def handle_agent_command(args: argparse.Namespace) -> None:
+    if args.agent_command == "run":
+        run_agent(args)
+    elif args.agent_command == "permission":
+        check_permissions()
+def check_permissions() -> None:
+    """Check and request macOS permissions for screen recording and accessibility.
+    Guides the user through granting permissions one at a time.
+    """
+    if sys.platform != "darwin":
+        print("Warning: Permission check is only applicable on macOS.")
+        print("On other platforms, no special permissions are required.")
+        return
+    check_optional_dependency("Quartz", "Permission check", "desktop")
+    check_optional_dependency("ApplicationServices", "Permission check", "desktop")
+    import subprocess  # noqa: PLC0415
+    from ApplicationServices import AXIsProcessTrusted  # noqa: PLC0415
+    from Quartz import (  # noqa: PLC0415
+        CGPreflightScreenCaptureAccess,
+        CGRequestScreenCaptureAccess,
+    )
+    # Check all permissions first to show status
+    screen_recording_granted = CGPreflightScreenCaptureAccess()
+    accessibility_granted = AXIsProcessTrusted()
+    print("Checking permissions...")
+    print(f"  {'[OK]' if screen_recording_granted else '[MISSING]'} Screen Recording")
+    print(f"  {'[OK]' if accessibility_granted else '[MISSING]'} Accessibility")
+    # Guide user through missing permissions one at a time
+    if not screen_recording_granted:
+        CGRequestScreenCaptureAccess()
+        subprocess.run(
+            [
+                "open",
+                "x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture",
+            ],
+            check=False,
+        )
+        print("\nPlease grant Screen Recording permission in System Preferences.")
+        print("After granting, run this command again to continue.")
+        print("Note: You may need to restart your terminal after granting permissions.")
+        sys.exit(1)
+    if not accessibility_granted:
+        subprocess.run(
+            [
+                "open",
+                "x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility",
+            ],
+            check=False,
+        )
+        print("\nPlease grant Accessibility permission in System Preferences.")
+        print("After granting, run this command again to continue.")
+        print("Note: You may need to restart your terminal after granting permissions.")
+        sys.exit(1)
+    print()
+    print("All permissions granted. You can run the agent.")
+def _warn_missing_permissions() -> None:
+    if sys.platform != "darwin":
+        return
+    if not check_optional_dependency(
+        "Quartz", "Permission check", "desktop", raise_error=False
+    ):
+        return
+    if not check_optional_dependency(
+        "ApplicationServices", "Permission check", "desktop", raise_error=False
+    ):
+        return
+    from ApplicationServices import AXIsProcessTrusted  # noqa: PLC0415
+    from Quartz import CGPreflightScreenCaptureAccess  # noqa: PLC0415
+    missing = []
+    if not CGPreflightScreenCaptureAccess():
+        missing.append("Screen Recording")
+    if not AXIsProcessTrusted():
+        missing.append("Accessibility")
+    if missing:
+        print(f"Warning: Missing macOS permissions: {', '.join(missing)}")
+        print("Run 'oagi agent permission' to configure permissions.\n")
+def run_agent(args: argparse.Namespace) -> None:
+    # Check if desktop extras are installed
+    check_optional_dependency("pyautogui", "Agent execution", "desktop")
+    check_optional_dependency("PIL", "Agent execution", "desktop")
+    # Warn about missing macOS permissions (non-blocking)
+    _warn_missing_permissions()
+    from oagi import AsyncPyautoguiActionHandler, AsyncScreenshotMaker  # noqa: PLC0415
+    from oagi.agent import create_agent  # noqa: PLC0415
+    # Get configuration
+    api_key = args.oagi_api_key or os.getenv("OAGI_API_KEY")
+    if not api_key:
+        print(
+            "Error: OAGI API key not provided.\n"
+            "Set OAGI_API_KEY environment variable or use --oagi-api-key flag.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    base_url = args.oagi_base_url or os.getenv(
+        "OAGI_BASE_URL", "https://api.agiopen.org"
+    )
+    model = args.model or "lux-actor-1"
+    max_steps = args.max_steps or 20
+    temperature = args.temperature if args.temperature is not None else 0.5
+    mode = args.mode or "actor"
+    export_format = args.export
+    export_file = args.export_file
+    # Create observers
+    step_tracker = StepTracker()
+    agent_observer = AsyncAgentObserver() if export_format else None
+    # Use a combined observer that forwards to both
+    class CombinedObserver:
+        async def on_event(self, event):
+            await step_tracker.on_event(event)
+            if agent_observer:
+                await agent_observer.on_event(event)
+    observer = CombinedObserver()
+    # Create agent with observer
+    agent = create_agent(
+        mode=mode,
+        api_key=api_key,
+        base_url=base_url,
+        model=model,
+        max_steps=max_steps,
+        temperature=temperature,
+        step_observer=observer,
+    )
+    # Create handlers
+    action_handler = AsyncPyautoguiActionHandler()
+    image_provider = AsyncScreenshotMaker()
+    print(f"Starting agent with instruction: {args.instruction}")
+    print(
+        f"Mode: {mode}, Model: {model}, Max steps: {max_steps}, Temperature: {temperature}"
+    )
+    print("-" * 60)
+    start_time = time.time()
+    success = False
+    interrupted = False
+    try:
+        success = asyncio.run(
+            agent.execute(
+                instruction=args.instruction,
+                action_handler=action_handler,
+                image_provider=image_provider,
+            )
+        )
+    except KeyboardInterrupt:
+        print("\nAgent execution interrupted by user (Ctrl+C)")
+        interrupted = True
+    except Exception as e:
+        print(f"\nError during agent execution: {e}", file=sys.stderr)
+        traceback.print_exc()
+    finally:
+        duration = time.time() - start_time
+        if step_tracker.steps:
+            print("\n" + "=" * 60)
+            display_step_table(step_tracker.steps, success, duration)
+        else:
+            print("\nNo steps were executed.")
+        # Export if requested
+        if export_format and agent_observer:
+            # Determine output file path
+            if export_file:
+                output_path = export_file
+            else:
+                ext_map = {"markdown": "md", "html": "html", "json": "json"}
+                output_path = f"execution_report.{ext_map[export_format]}"
+            try:
+                agent_observer.export(export_format, output_path)
+                print(f"\nExecution history exported to: {output_path}")
+            except Exception as e:
+                print(f"\nError exporting execution history: {e}", file=sys.stderr)
+        if interrupted:
+            sys.exit(130)
+        elif not success:
+            sys.exit(1)

oagi/cli/display.py ADDED Viewed

@@ -0,0 +1,56 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from rich.console import Console
+from rich.table import Table
+from .tracking import StepData
+def display_step_table(
+    steps: list[StepData], success: bool, duration: float | None = None
+):
+    console = Console()
+    table = Table(title="Agent Execution Summary", show_lines=True)
+    table.add_column("Step", justify="center", style="cyan", width=6)
+    table.add_column("Reasoning", style="white")
+    table.add_column("Actions", style="yellow", width=35)
+    table.add_column("Status", justify="center", width=8)
+    for step in steps:
+        reason = step.reasoning or "N/A"
+        actions_display = []
+        for action in step.actions[:3]:
+            arg = action.argument[:20] if action.argument else ""
+            actions_display.append(f"{action.type.value}({arg})")
+        actions_str = ", ".join(actions_display)
+        if len(step.actions) > 3:
+            actions_str += f" (+{len(step.actions) - 3} more)"
+        status_display = "✓" if step.status == "complete" else "→"
+        table.add_row(
+            str(step.step_num),
+            reason,
+            actions_str,
+            status_display,
+        )
+    console.print(table)
+    status_text = "Success" if success else "Failed/Interrupted"
+    console.print(
+        f"\nTotal Steps: {len(steps)} | Status: {status_text}",
+        style="bold",
+    )
+    if duration:
+        console.print(f"Duration: {duration:.2f}s")