PyPI - oagi-core - Versions diffs - 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

oagi-core 0.9.1py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

oagi/__init__.py +76 -33
oagi/agent/__init__.py +2 -0
oagi/agent/default.py +45 -12
oagi/agent/factories.py +22 -3
oagi/agent/observer/__init__.py +38 -0
oagi/agent/observer/agent_observer.py +99 -0
oagi/agent/observer/events.py +28 -0
oagi/agent/observer/exporters.py +445 -0
oagi/agent/observer/protocol.py +12 -0
oagi/agent/registry.py +2 -2
oagi/agent/tasker/models.py +1 -0
oagi/agent/tasker/planner.py +41 -9
oagi/agent/tasker/taskee_agent.py +178 -86
oagi/agent/tasker/tasker_agent.py +25 -14
oagi/cli/agent.py +50 -9
oagi/cli/tracking.py +27 -17
oagi/cli/utils.py +11 -4
oagi/client/base.py +3 -7
oagi/handler/_macos.py +55 -0
oagi/handler/pyautogui_action_handler.py +19 -2
oagi/server/agent_wrappers.py +5 -5
oagi/server/config.py +3 -3
oagi/server/models.py +2 -2
oagi/server/session_store.py +2 -2
oagi/server/socketio_server.py +1 -1
oagi/task/async_.py +13 -34
oagi/task/async_short.py +2 -2
oagi/task/base.py +41 -7
oagi/task/short.py +2 -2
oagi/task/sync.py +11 -34
oagi/types/__init__.py +24 -4
oagi/types/async_image_provider.py +3 -2
oagi/types/image_provider.py +3 -2
oagi/types/step_observer.py +75 -16
oagi/types/url.py +3 -0
{oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/METADATA +38 -25
oagi_core-0.10.0.dist-info/RECORD +68 -0
oagi/types/url_image.py +0 -47
oagi_core-0.9.1.dist-info/RECORD +0 -62
{oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/WHEEL +0 -0
{oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/entry_points.txt +0 -0
{oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/licenses/LICENSE +0 -0

oagi/agent/observer/exporters.py ADDED Viewed

@@ -0,0 +1,445 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import base64
+import json
+from pathlib import Path
+from ...types import (
+    ActionEvent,
+    ImageEvent,
+    LogEvent,
+    ObserverEvent,
+    PlanEvent,
+    SplitEvent,
+    StepEvent,
+)
+def export_to_markdown(
+    events: list[ObserverEvent],
+    path: str,
+    images_dir: str | None = None,
+) -> None:
+    """Export events to a Markdown file.
+    Args:
+        events: List of events to export.
+        path: Path to the output Markdown file.
+        images_dir: Directory to save images. If None, images are not saved.
+    """
+    output_path = Path(path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    if images_dir:
+        images_path = Path(images_dir)
+        images_path.mkdir(parents=True, exist_ok=True)
+    lines: list[str] = ["# Agent Execution Report\n"]
+    image_counter = 0
+    for event in events:
+        timestamp = event.timestamp.strftime("%H:%M:%S")
+        match event:
+            case StepEvent():
+                lines.append(f"\n## Step {event.step_num}\n")
+                lines.append(f"**Time:** {timestamp}\n")
+                if isinstance(event.image, bytes):
+                    if images_dir:
+                        image_counter += 1
+                        image_filename = f"step_{event.step_num}.png"
+                        image_path = Path(images_dir) / image_filename
+                        image_path.write_bytes(event.image)
+                        rel_path = Path(images_dir).name / Path(image_filename)
+                        lines.append(f"\n![Step {event.step_num}]({rel_path})\n")
+                    else:
+                        lines.append(
+                            f"\n*[Screenshot captured - {len(event.image)} bytes]*\n"
+                        )
+                elif isinstance(event.image, str):
+                    lines.append(f"\n**Screenshot URL:** {event.image}\n")
+                if event.step.reason:
+                    lines.append(f"\n**Reasoning:**\n> {event.step.reason}\n")
+                if event.step.actions:
+                    lines.append("\n**Planned Actions:**\n")
+                    for action in event.step.actions:
+                        count_str = (
+                            f" (x{action.count})"
+                            if action.count and action.count > 1
+                            else ""
+                        )
+                        lines.append(
+                            f"- `{action.type.value}`: {action.argument}{count_str}\n"
+                        )
+                if event.step.stop:
+                    lines.append("\n**Status:** Task Complete\n")
+            case ActionEvent():
+                lines.append(f"\n### Actions Executed ({timestamp})\n")
+                if event.error:
+                    lines.append(f"\n**Error:** {event.error}\n")
+                else:
+                    lines.append("\n**Result:** Success\n")
+            case LogEvent():
+                lines.append(f"\n> **Log ({timestamp}):** {event.message}\n")
+            case SplitEvent():
+                if event.label:
+                    lines.append(f"\n---\n\n### {event.label}\n")
+                else:
+                    lines.append("\n---\n")
+            case ImageEvent():
+                pass
+            case PlanEvent():
+                phase_titles = {
+                    "initial": "Initial Planning",
+                    "reflection": "Reflection",
+                    "summary": "Summary",
+                }
+                phase_title = phase_titles.get(event.phase, event.phase.capitalize())
+                lines.append(f"\n### {phase_title} ({timestamp})\n")
+                if event.image:
+                    if isinstance(event.image, bytes):
+                        if images_dir:
+                            image_counter += 1
+                            image_filename = f"plan_{event.phase}_{image_counter}.png"
+                            image_path = Path(images_dir) / image_filename
+                            image_path.write_bytes(event.image)
+                            rel_path = Path(images_dir).name / Path(image_filename)
+                            lines.append(f"\n![{phase_title}]({rel_path})\n")
+                        else:
+                            lines.append(
+                                f"\n*[Screenshot captured - {len(event.image)} bytes]*\n"
+                            )
+                    elif isinstance(event.image, str):
+                        lines.append(f"\n**Screenshot URL:** {event.image}\n")
+                if event.reasoning:
+                    lines.append(f"\n**Reasoning:**\n> {event.reasoning}\n")
+                if event.result:
+                    lines.append(f"\n**Result:** {event.result}\n")
+    output_path.write_text("".join(lines))
+def export_to_html(events: list[ObserverEvent], path: str) -> None:
+    """Export events to a self-contained HTML file.
+    Args:
+        events: List of events to export.
+        path: Path to the output HTML file.
+    """
+    output_path = Path(path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    html_parts: list[str] = [_get_html_header()]
+    for event in events:
+        timestamp = event.timestamp.strftime("%H:%M:%S")
+        match event:
+            case StepEvent():
+                html_parts.append('<div class="step">')
+                html_parts.append(f"<h2>Step {event.step_num}</h2>")
+                html_parts.append(f'<span class="timestamp">{timestamp}</span>')
+                if isinstance(event.image, bytes):
+                    b64_image = base64.b64encode(event.image).decode("utf-8")
+                    html_parts.append(
+                        f'<img src="data:image/png;base64,{b64_image}" '
+                        f'alt="Step {event.step_num}" class="screenshot"/>'
+                    )
+                elif isinstance(event.image, str):
+                    html_parts.append(
+                        f'<p class="url">Screenshot URL: <a href="{event.image}">{event.image}</a></p>'
+                    )
+                if event.step.reason:
+                    html_parts.append('<div class="reasoning">')
+                    html_parts.append(
+                        f"<strong>Reasoning:</strong><p>{_escape_html(event.step.reason)}</p>"
+                    )
+                    html_parts.append("</div>")
+                if event.step.actions:
+                    html_parts.append('<div class="actions">')
+                    html_parts.append("<strong>Planned Actions:</strong><ul>")
+                    for action in event.step.actions:
+                        count_str = (
+                            f" (x{action.count})"
+                            if action.count and action.count > 1
+                            else ""
+                        )
+                        html_parts.append(
+                            f"<li><code>{action.type.value}</code>: "
+                            f"{_escape_html(action.argument)}{count_str}</li>"
+                        )
+                    html_parts.append("</ul></div>")
+                if event.step.stop:
+                    html_parts.append('<div class="complete">Task Complete</div>')
+                html_parts.append("</div>")
+            case ActionEvent():
+                html_parts.append('<div class="action-result">')
+                html_parts.append(f'<span class="timestamp">{timestamp}</span>')
+                if event.error:
+                    html_parts.append(
+                        f'<div class="error">Error: {_escape_html(event.error)}</div>'
+                    )
+                else:
+                    html_parts.append(
+                        '<div class="success">Actions executed successfully</div>'
+                    )
+                html_parts.append("</div>")
+            case LogEvent():
+                html_parts.append('<div class="log">')
+                html_parts.append(f'<span class="timestamp">{timestamp}</span>')
+                html_parts.append(f"<p>{_escape_html(event.message)}</p>")
+                html_parts.append("</div>")
+            case SplitEvent():
+                if event.label:
+                    html_parts.append(
+                        f'<div class="split"><h3>{_escape_html(event.label)}</h3></div>'
+                    )
+                else:
+                    html_parts.append('<hr class="split-line"/>')
+            case ImageEvent():
+                pass
+            case PlanEvent():
+                phase_titles = {
+                    "initial": "Initial Planning",
+                    "reflection": "Reflection",
+                    "summary": "Summary",
+                }
+                phase_title = phase_titles.get(event.phase, event.phase.capitalize())
+                html_parts.append('<div class="plan">')
+                html_parts.append(f"<h3>{phase_title}</h3>")
+                html_parts.append(f'<span class="timestamp">{timestamp}</span>')
+                if event.image:
+                    if isinstance(event.image, bytes):
+                        b64_image = base64.b64encode(event.image).decode("utf-8")
+                        html_parts.append(
+                            f'<img src="data:image/png;base64,{b64_image}" '
+                            f'alt="{phase_title}" class="screenshot"/>'
+                        )
+                    elif isinstance(event.image, str):
+                        html_parts.append(
+                            f'<p class="url">Screenshot URL: '
+                            f'<a href="{event.image}">{event.image}</a></p>'
+                        )
+                if event.reasoning:
+                    html_parts.append('<div class="reasoning">')
+                    html_parts.append(
+                        f"<strong>Reasoning:</strong><p>{_escape_html(event.reasoning)}</p>"
+                    )
+                    html_parts.append("</div>")
+                if event.result:
+                    html_parts.append(
+                        f'<div class="plan-result"><strong>Result:</strong> '
+                        f"{_escape_html(event.result)}</div>"
+                    )
+                html_parts.append("</div>")
+    html_parts.append(_get_html_footer())
+    output_path.write_text("".join(html_parts))
+def _escape_html(text: str) -> str:
+    """Escape HTML special characters."""
+    return (
+        text.replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace('"', "&quot;")
+        .replace("'", "&#39;")
+    )
+def _get_html_header() -> str:
+    """Get HTML document header with CSS styles."""
+    return """<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Agent Execution Report</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 20px;
+            background: #f5f5f5;
+        }
+        h1 {
+            color: #333;
+            border-bottom: 2px solid #007bff;
+            padding-bottom: 10px;
+        }
+        .step {
+            background: white;
+            border-radius: 8px;
+            padding: 20px;
+            margin: 20px 0;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        .step h2 {
+            margin-top: 0;
+            color: #007bff;
+        }
+        .timestamp {
+            color: #666;
+            font-size: 0.9em;
+        }
+        .screenshot {
+            max-width: 100%;
+            border: 1px solid #ddd;
+            border-radius: 4px;
+            margin: 10px 0;
+        }
+        .reasoning {
+            background: #f8f9fa;
+            padding: 10px;
+            border-left: 3px solid #007bff;
+            margin: 10px 0;
+        }
+        .actions {
+            margin: 10px 0;
+        }
+        .actions ul {
+            margin: 5px 0;
+            padding-left: 20px;
+        }
+        .actions code {
+            background: #e9ecef;
+            padding: 2px 6px;
+            border-radius: 3px;
+        }
+        .complete {
+            background: #d4edda;
+            color: #155724;
+            padding: 10px;
+            border-radius: 4px;
+            margin-top: 10px;
+        }
+        .action-result {
+            padding: 10px;
+            margin: 5px 0;
+        }
+        .success {
+            color: #155724;
+        }
+        .error {
+            color: #721c24;
+            background: #f8d7da;
+            padding: 10px;
+            border-radius: 4px;
+        }
+        .log {
+            background: #fff3cd;
+            padding: 10px;
+            margin: 10px 0;
+            border-radius: 4px;
+        }
+        .split {
+            text-align: center;
+            margin: 30px 0;
+        }
+        .split h3 {
+            color: #666;
+        }
+        .split-line {
+            border: none;
+            border-top: 2px dashed #ccc;
+            margin: 30px 0;
+        }
+        .url {
+            word-break: break-all;
+        }
+        .plan {
+            background: #e7f3ff;
+            border-radius: 8px;
+            padding: 20px;
+            margin: 20px 0;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        .plan h3 {
+            margin-top: 0;
+            color: #0056b3;
+        }
+        .plan-result {
+            background: #d1ecf1;
+            color: #0c5460;
+            padding: 10px;
+            border-radius: 4px;
+            margin-top: 10px;
+        }
+    </style>
+</head>
+<body>
+    <h1>Agent Execution Report</h1>
+"""
+def _get_html_footer() -> str:
+    """Get HTML document footer."""
+    return """
+</body>
+</html>
+"""
+def export_to_json(events: list[ObserverEvent], path: str) -> None:
+    """Export events to a JSON file.
+    Args:
+        events: List of events to export.
+        path: Path to the output JSON file.
+    """
+    output_path = Path(path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    # Convert events to JSON-serializable format
+    json_events = []
+    for event in events:
+        # Handle bytes images before model_dump to avoid UTF-8 decode error
+        if isinstance(event, (StepEvent, ImageEvent, PlanEvent)) and isinstance(
+            getattr(event, "image", None), bytes
+        ):
+            # Dump without json mode first, then handle bytes manually
+            event_dict = event.model_dump()
+            event_dict["image"] = base64.b64encode(event.image).decode("utf-8")
+            event_dict["image_encoding"] = "base64"
+            # Convert datetime to string
+            if "timestamp" in event_dict:
+                event_dict["timestamp"] = event_dict["timestamp"].isoformat()
+        else:
+            event_dict = event.model_dump(mode="json")
+        json_events.append(event_dict)
+    output_path.write_text(json.dumps(json_events, indent=2, default=str))

oagi/agent/observer/protocol.py ADDED Viewed

@@ -0,0 +1,12 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+# Re-export from types for convenience
+from ...types import AsyncObserver
+__all__ = ["AsyncObserver"]

oagi/agent/registry.py CHANGED Viewed

@@ -91,7 +91,7 @@ def create_agent(mode: str, **kwargs: Any) -> AsyncAgent:
     Standard parameters typically include:
     - api_key: OAGI API key
     - base_url: OAGI API base URL
-    - model: Model identifier (e.g., "lux-v1")
+    - model: Model identifier (e.g., "lux-actor-1")
     - max_steps: Maximum number of steps to execute
     - temperature: Sampling temperature
@@ -111,7 +111,7 @@ def create_agent(mode: str, **kwargs: Any) -> AsyncAgent:
             mode="actor",
             api_key="...",
             base_url="...",
-            model="lux-v1",
+            model="lux-actor-1",
             max_steps=30,
             temperature=0.0,
         )

oagi/agent/tasker/models.py CHANGED Viewed

@@ -44,6 +44,7 @@ class Action(BaseModel):
     details: dict[str, Any] = Field(default_factory=dict)
     reasoning: str | None = None
     result: str | None = None
+    screenshot_uuid: str | None = None  # UUID of uploaded screenshot for this action
 class TodoHistory(BaseModel):

oagi/agent/tasker/planner.py CHANGED Viewed

@@ -10,6 +10,7 @@ import json
 from typing import Any
 from ...client import AsyncClient
+from ...types import URL, Image
 from .memory import PlannerMemory
 from .models import Action, PlannerOutput, ReflectionOutput
@@ -20,19 +21,28 @@ class Planner:
     This class provides planning and reflection capabilities using OAGI workers.
     """
-    def __init__(self, client: AsyncClient | None = None):
+    def __init__(
+        self,
+        client: AsyncClient | None = None,
+        api_key: str | None = None,
+        base_url: str | None = None,
+    ):
         """Initialize the planner.
         Args:
             client: AsyncClient for OAGI API calls. If None, one will be created when needed.
+            api_key: API key for creating internal client
+            base_url: Base URL for creating internal client
         """
         self.client = client
+        self.api_key = api_key
+        self.base_url = base_url
         self._owns_client = False  # Track if we created the client
     def _ensure_client(self) -> AsyncClient:
         """Ensure we have a client, creating one if needed."""
         if not self.client:
-            self.client = AsyncClient()
+            self.client = AsyncClient(api_key=self.api_key, base_url=self.base_url)
             self._owns_client = True
         return self.client
@@ -111,7 +121,7 @@ class Planner:
         self,
         todo: str,
         context: dict[str, Any],
-        screenshot: bytes | None = None,
+        screenshot: Image | URL | None = None,
         memory: PlannerMemory | None = None,
         todo_index: int | None = None,
     ) -> PlannerOutput:
@@ -166,10 +176,11 @@ class Planner:
         self,
         actions: list[Action],
         context: dict[str, Any],
-        screenshot: bytes | None = None,
+        screenshot: Image | URL | None = None,
         memory: PlannerMemory | None = None,
         todo_index: int | None = None,
         current_instruction: str | None = None,
+        reflection_interval: int = 4,
     ) -> ReflectionOutput:
         """Reflect on recent actions and progress.
@@ -180,6 +191,7 @@ class Planner:
             memory: Optional PlannerMemory for formatting contexts
             todo_index: Optional todo index for formatting internal context
             current_instruction: Current subtask instruction being executed
+            reflection_interval: Window size for recent actions/screenshots
         Returns:
             ReflectionOutput with continuation decision and reasoning
@@ -203,6 +215,9 @@ class Planner:
             overall_todo,
         ) = self._extract_memory_data(memory, context, todo_index)
+        # Get window of recent actions based on reflection_interval
+        window_actions = actions[-reflection_interval:]
         # Convert actions to window_steps format
         window_steps = [
             {
@@ -211,7 +226,14 @@ class Planner:
                 "target": action.target or "",
                 "reasoning": action.reasoning or "",
             }
-            for i, action in enumerate(actions[-10:])  # Last 10 actions
+            for i, action in enumerate(window_actions)
+        ]
+        # Extract screenshot UUIDs from window actions
+        window_screenshots = [
+            action.screenshot_uuid
+            for action in window_actions
+            if action.screenshot_uuid
         ]
         # Format prior notes from context (still needed as a simple string summary)
@@ -229,7 +251,7 @@ class Planner:
             task_execution_summary=task_execution_summary,
             current_subtask_instruction=current_instruction or "",
             window_steps=window_steps,
-            window_screenshots=[],  # Could be populated if we track screenshot history
+            window_screenshots=window_screenshots,
             result_screenshot=result_screenshot_uuid,
             prior_notes=prior_notes,
         )
@@ -328,7 +350,9 @@ class Planner:
         """
         try:
             # Try to parse as JSON (oagi_first format)
-            data = json.loads(response)
+            # Extract JSON string to handle Markdown code blocks
+            json_response = self._extract_json_str(response)
+            data = json.loads(json_response)
             # oagi_first returns: {"reasoning": "...", "subtask": "..."}
             return PlannerOutput(
                 instruction=data.get("subtask", data.get("instruction", "")),
@@ -340,7 +364,7 @@ class Planner:
         except (json.JSONDecodeError, KeyError):
             # Fallback: use the entire response as instruction
             return PlannerOutput(
-                instruction=response,
+                instruction="",
                 reasoning="Failed to parse structured response",
                 subtodos=[],
             )
@@ -356,7 +380,8 @@ class Planner:
         """
         try:
             # Try to parse as JSON (oagi_follow format)
-            data = json.loads(response)
+            json_response = self._extract_json_str(response)
+            data = json.loads(json_response)
             # oagi_follow returns:
             # {"assessment": "...", "summary": "...", "reflection": "...",
             #  "success": "yes" | "no", "subtask_instruction": "..."}
@@ -383,3 +408,10 @@ class Planner:
                 reasoning="Failed to parse reflection response, continuing current approach",
                 success_assessment=False,
             )
+    def _extract_json_str(self, text: str) -> str:
+        start = text.find("{")
+        end = text.rfind("}") + 1
+        if start < 0 or end <= start:
+            return ""
+        return text[start:end]

oagi-core 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

oagi-core 0.9.1py3-none-any.whl → 0.10.0py3-none-any.whl