PyPI - oagi-core - Versions diffs - 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

oagi-core 0.11.0py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

oagi/__init__.py +1 -3
oagi/actor/__init__.py +21 -0
oagi/{task → actor}/async_.py +23 -7
oagi/{task → actor}/async_short.py +1 -1
oagi/actor/base.py +222 -0
oagi/{task → actor}/short.py +1 -1
oagi/{task → actor}/sync.py +21 -5
oagi/agent/default.py +1 -0
oagi/agent/observer/exporters.py +6 -0
oagi/agent/observer/report_template.html +19 -0
oagi/agent/tasker/planner.py +14 -12
oagi/agent/tasker/taskee_agent.py +7 -3
oagi/client/async_.py +54 -96
oagi/client/base.py +81 -133
oagi/client/sync.py +52 -99
oagi/constants.py +7 -2
oagi/task/__init__.py +22 -8
oagi/types/models/__init__.py +0 -2
oagi/types/models/action.py +4 -1
oagi/types/models/client.py +1 -17
oagi/types/step_observer.py +2 -0
oagi/utils/__init__.py +12 -0
oagi/utils/output_parser.py +166 -0
oagi/utils/prompt_builder.py +44 -0
{oagi_core-0.11.0.dist-info → oagi_core-0.12.0.dist-info}/METADATA +57 -10
{oagi_core-0.11.0.dist-info → oagi_core-0.12.0.dist-info}/RECORD +29 -25
oagi/task/base.py +0 -158
{oagi_core-0.11.0.dist-info → oagi_core-0.12.0.dist-info}/WHEEL +0 -0
{oagi_core-0.11.0.dist-info → oagi_core-0.12.0.dist-info}/entry_points.txt +0 -0
{oagi_core-0.11.0.dist-info → oagi_core-0.12.0.dist-info}/licenses/LICENSE +0 -0

oagi/__init__.py CHANGED Viewed

@@ -8,6 +8,7 @@
 import importlib
 from typing import TYPE_CHECKING
+from oagi.actor import Actor, AsyncActor, AsyncShortTask, AsyncTask, ShortTask, Task
 from oagi.client import AsyncClient, SyncClient
 from oagi.exceptions import (
     APIError,
@@ -22,13 +23,11 @@ from oagi.exceptions import (
     ValidationError,
     check_optional_dependency,
 )
-from oagi.task import Actor, AsyncActor, AsyncShortTask, AsyncTask, ShortTask, Task
 from oagi.types import ImageConfig
 from oagi.types.models import (
     ErrorDetail,
     ErrorResponse,
     GenerateResponse,
-    LLMResponse,
     UploadFileResponse,
 )
@@ -116,7 +115,6 @@ __all__ = [
     # Configuration
     "ImageConfig",
     # Response models
-    "LLMResponse",
     "GenerateResponse",
     "UploadFileResponse",
     "ErrorResponse",

oagi/actor/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from .async_ import AsyncActor, AsyncTask
+from .async_short import AsyncShortTask
+from .short import ShortTask
+from .sync import Actor, Task
+__all__ = [
+    "Actor",
+    "AsyncActor",
+    "Task",  # Deprecated: Use Actor instead
+    "AsyncTask",  # Deprecated: Use AsyncActor instead
+    "ShortTask",  # Deprecated
+    "AsyncShortTask",  # Deprecated
+]

oagi/{task → actor}/async_.py RENAMED Viewed

@@ -10,9 +10,12 @@ import warnings
 from ..client import AsyncClient
 from ..constants import DEFAULT_MAX_STEPS, MODEL_ACTOR
+from ..logging import get_logger
 from ..types import URL, Image, Step
 from .base import BaseActor
+logger = get_logger("actor.async")
 class AsyncActor(BaseActor):
     """Async base class for task automation with the OAGI API."""
@@ -51,20 +54,33 @@ class AsyncActor(BaseActor):
         """Send screenshot to the server and get the next actions.
         Args:
-            screenshot: Screenshot as Image object or raw bytes
-            instruction: Optional additional instruction for this step
+            screenshot: Screenshot as Image object, URL string, or raw bytes
+            instruction: Optional additional instruction for this step (currently unused)
             temperature: Sampling temperature for this step (overrides task default if provided)
         Returns:
             Step: The actions and reasoning for this step
         """
-        kwargs = self._prepare_step(
-            screenshot, instruction, temperature, prefix="async "
-        )
+        self._validate_and_increment_step()
+        self._log_step_execution(prefix="async ")
         try:
-            response = await self.client.create_message(**kwargs)
-            return self._build_step_response(response, prefix="Async ")
+            screenshot_url = await self._ensure_screenshot_url_async(
+                screenshot, self.client
+            )
+            self._add_user_message_to_history(screenshot_url, self._build_step_prompt())
+            step, raw_output, usage = await self.client.chat_completion(
+                model=self.model,
+                messages=self.message_history,
+                temperature=self._get_temperature(temperature),
+                task_id=self.task_id,
+            )
+            self._add_assistant_message_to_history(raw_output)
+            self._log_step_completion(step, prefix="Async ")
+            return step
         except Exception as e:
             self._handle_step_error(e, prefix="async ")

oagi/{task → actor}/async_short.py RENAMED Viewed

@@ -14,7 +14,7 @@ from ..types import AsyncActionHandler, AsyncImageProvider
 from .async_ import AsyncActor
 from .base import BaseAutoMode
-logger = get_logger("async_short_task")
+logger = get_logger("async_short_actor")
 class AsyncShortTask(AsyncActor, BaseAutoMode):

oagi/actor/base.py ADDED Viewed

@@ -0,0 +1,222 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from uuid import uuid4
+from ..constants import (
+    DEFAULT_MAX_STEPS,
+    MAX_STEPS_ACTOR,
+    MAX_STEPS_THINKER,
+    MODEL_THINKER,
+)
+from ..logging import get_logger
+from ..types import URL, Image, Step
+from ..utils.prompt_builder import build_prompt
+logger = get_logger("actor.base")
+class BaseActor:
+    """Base class with shared task management logic for sync/async actors."""
+    def __init__(
+        self,
+        api_key: str | None,
+        base_url: str | None,
+        model: str,
+        temperature: float | None,
+    ):
+        self.task_id: str = uuid4().hex  # Client-side generated UUID
+        self.task_description: str | None = None
+        self.model = model
+        self.temperature = temperature
+        self.message_history: list = []  # OpenAI-compatible message history
+        self.max_steps: int = DEFAULT_MAX_STEPS
+        self.current_step: int = 0  # Current step counter
+        # Client will be set by subclasses
+        self.api_key: str | None = None
+        self.base_url: str | None = None
+    def _validate_max_steps(self, max_steps: int) -> int:
+        """Validate and cap max_steps based on model type.
+        Args:
+            max_steps: Requested maximum number of steps
+        Returns:
+            Validated max_steps (capped to model limit if exceeded)
+        """
+        limit = MAX_STEPS_THINKER if self.model == MODEL_THINKER else MAX_STEPS_ACTOR
+        if max_steps > limit:
+            logger.warning(
+                f"max_steps ({max_steps}) exceeds limit for model '{self.model}'. "
+                f"Capping to {limit}."
+            )
+            return limit
+        return max_steps
+    def _prepare_init_task(
+        self,
+        task_desc: str,
+        max_steps: int,
+    ):
+        """Prepare task initialization.
+        Args:
+            task_desc: Task description
+            max_steps: Maximum number of steps
+        """
+        self.task_id = uuid4().hex
+        self.task_description = task_desc
+        self.message_history = []
+        self.max_steps = self._validate_max_steps(max_steps)
+        self.current_step = 0
+        logger.info(f"Task initialized: '{task_desc}' (max_steps: {self.max_steps})")
+    def _validate_and_increment_step(self):
+        if not self.task_description:
+            raise ValueError("Task description must be set. Call init_task() first.")
+        if self.current_step >= self.max_steps:
+            raise ValueError(
+                f"Max steps limit ({self.max_steps}) reached. "
+                "Call init_task() to start a new task."
+            )
+        self.current_step += 1
+    def _get_temperature(self, temperature: float | None) -> float | None:
+        return temperature if temperature is not None else self.temperature
+    def _prepare_screenshot(self, screenshot: Image | bytes) -> bytes:
+        if isinstance(screenshot, Image):
+            return screenshot.read()
+        return screenshot
+    def _get_screenshot_url(self, screenshot: Image | URL | bytes) -> str | None:
+        """Get screenshot URL if it's a string, otherwise return None."""
+        if isinstance(screenshot, str):
+            return screenshot
+        return None
+    def _ensure_screenshot_url_sync(
+        self, screenshot: Image | URL | bytes, client
+    ) -> str:
+        """Get screenshot URL, uploading to S3 if needed (sync version).
+        Args:
+            screenshot: Screenshot as Image object, URL string, or raw bytes
+            client: SyncClient instance for S3 upload
+        Returns:
+            Screenshot URL (either direct or from S3 upload)
+        """
+        screenshot_url = self._get_screenshot_url(screenshot)
+        if screenshot_url is None:
+            screenshot_bytes = self._prepare_screenshot(screenshot)
+            upload_response = client.put_s3_presigned_url(screenshot_bytes)
+            screenshot_url = upload_response.download_url
+        return screenshot_url
+    async def _ensure_screenshot_url_async(
+        self, screenshot: Image | URL | bytes, client
+    ) -> str:
+        """Get screenshot URL, uploading to S3 if needed (async version).
+        Args:
+            screenshot: Screenshot as Image object, URL string, or raw bytes
+            client: AsyncClient instance for S3 upload
+        Returns:
+            Screenshot URL (either direct or from S3 upload)
+        """
+        screenshot_url = self._get_screenshot_url(screenshot)
+        if screenshot_url is None:
+            screenshot_bytes = self._prepare_screenshot(screenshot)
+            upload_response = await client.put_s3_presigned_url(screenshot_bytes)
+            screenshot_url = upload_response.download_url
+        return screenshot_url
+    def _add_user_message_to_history(
+        self, screenshot_url: str, prompt: str | None = None
+    ):
+        """Add user message with screenshot to message history.
+        Args:
+            screenshot_url: URL of the screenshot
+            prompt: Optional prompt text (for first message only)
+        """
+        content = []
+        if prompt:
+            content.append({"type": "text", "text": prompt})
+        content.append({"type": "image_url", "image_url": {"url": screenshot_url}})
+        self.message_history.append(
+            {
+                "role": "user",
+                "content": content,
+            }
+        )
+    def _add_assistant_message_to_history(self, raw_output: str):
+        """Add assistant response to message history.
+        Args:
+            raw_output: Raw model output string
+        """
+        if raw_output:
+            self.message_history.append(
+                {
+                    "role": "assistant",
+                    "content": raw_output,
+                }
+            )
+    def _build_step_prompt(self) -> str | None:
+        """Build prompt for first message only."""
+        if len(self.message_history) == 0:
+            return build_prompt(self.task_description)
+        return None
+    def _log_step_completion(self, step: Step, prefix: str = "") -> None:
+        """Log step completion status."""
+        if step.stop:
+            logger.info(f"{prefix}Task completed.")
+        else:
+            logger.debug(f"{prefix}Step completed with {len(step.actions)} actions")
+    def _log_step_execution(self, prefix: str = ""):
+        logger.debug(f"Executing {prefix}step for task: '{self.task_description}'")
+    def _handle_step_error(self, error: Exception, prefix: str = ""):
+        logger.error(f"Error during {prefix}step execution: {error}")
+        raise
+class BaseAutoMode:
+    """Base class with shared auto_mode logic for ShortTask implementations."""
+    def _log_auto_mode_start(self, task_desc: str, max_steps: int, prefix: str = ""):
+        logger.info(
+            f"Starting {prefix}auto mode for task: '{task_desc}' (max_steps: {max_steps})"
+        )
+    def _log_auto_mode_step(self, step_num: int, max_steps: int, prefix: str = ""):
+        logger.debug(f"{prefix.capitalize()}auto mode step {step_num}/{max_steps}")
+    def _log_auto_mode_actions(self, action_count: int, prefix: str = ""):
+        verb = "asynchronously" if "async" in prefix else ""
+        logger.debug(f"Executing {action_count} actions {verb}".strip())
+    def _log_auto_mode_completion(self, steps: int, prefix: str = ""):
+        logger.info(
+            f"{prefix.capitalize()}auto mode completed successfully after {steps} steps"
+        )
+    def _log_auto_mode_max_steps(self, max_steps: int, prefix: str = ""):
+        logger.warning(
+            f"{prefix.capitalize()}auto mode reached max steps ({max_steps}) without completion"
+        )

oagi/{task → actor}/short.py RENAMED Viewed

@@ -14,7 +14,7 @@ from ..types import ActionHandler, ImageProvider
 from .base import BaseAutoMode
 from .sync import Actor
-logger = get_logger("short_task")
+logger = get_logger("short_actor")
 class ShortTask(Actor, BaseAutoMode):

oagi/{task → actor}/sync.py RENAMED Viewed

@@ -10,9 +10,12 @@ import warnings
 from ..client import SyncClient
 from ..constants import DEFAULT_MAX_STEPS, MODEL_ACTOR
+from ..logging import get_logger
 from ..types import URL, Image, Step
 from .base import BaseActor
+logger = get_logger("actor.sync")
 class Actor(BaseActor):
     """Base class for task automation with the OAGI API."""
@@ -51,18 +54,31 @@ class Actor(BaseActor):
         """Send screenshot to the server and get the next actions.
         Args:
-            screenshot: Screenshot as Image object or raw bytes
-            instruction: Optional additional instruction for this step
+            screenshot: Screenshot as Image object, URL string, or raw bytes
+            instruction: Optional additional instruction for this step (currently unused)
             temperature: Sampling temperature for this step (overrides task default if provided)
         Returns:
             Step: The actions and reasoning for this step
         """
-        kwargs = self._prepare_step(screenshot, instruction, temperature)
+        self._validate_and_increment_step()
+        self._log_step_execution()
         try:
-            response = self.client.create_message(**kwargs)
-            return self._build_step_response(response)
+            screenshot_url = self._ensure_screenshot_url_sync(screenshot, self.client)
+            self._add_user_message_to_history(screenshot_url, self._build_step_prompt())
+            step, raw_output, usage = self.client.chat_completion(
+                model=self.model,
+                messages=self.message_history,
+                temperature=self._get_temperature(temperature),
+                task_id=self.task_id,
+            )
+            self._add_assistant_message_to_history(raw_output)
+            self._log_step_completion(step)
+            return step
         except Exception as e:
             self._handle_step_error(e)

oagi/agent/default.py CHANGED Viewed

@@ -93,6 +93,7 @@ class AsyncDefaultAgent:
                             step_num=step_num,
                             image=_serialize_image(image),
                             step=step,
+                            task_id=self.actor.task_id,
                         )
                     )

oagi/agent/observer/exporters.py CHANGED Viewed

@@ -98,6 +98,8 @@ def export_to_markdown(
             case StepEvent():
                 lines.append(f"\n## Step {event.step_num}\n")
                 lines.append(f"**Time:** {timestamp}\n")
+                if event.task_id:
+                    lines.append(f"**Task ID:** `{event.task_id}`\n")
                 if isinstance(event.image, bytes):
                     if images_dir:
@@ -159,6 +161,8 @@ def export_to_markdown(
                 }
                 phase_title = phase_titles.get(event.phase, event.phase.capitalize())
                 lines.append(f"\n### {phase_title} ({timestamp})\n")
+                if event.request_id:
+                    lines.append(f"**Request ID:** `{event.request_id}`\n")
                 if event.image:
                     if isinstance(event.image, bytes):
@@ -227,6 +231,7 @@ def _convert_events_for_html(events: list[ObserverEvent]) -> list[dict]:
                         "reason": event.step.reason,
                         "actions": actions_list,
                         "stop": event.step.stop,
+                        "task_id": event.task_id,
                     }
                 )
@@ -275,6 +280,7 @@ def _convert_events_for_html(events: list[ObserverEvent]) -> list[dict]:
                         "image": image_data,
                         "reasoning": event.reasoning,
                         "result": event.result,
+                        "request_id": event.request_id,
                     }
                 )

oagi/agent/observer/report_template.html CHANGED Viewed

@@ -46,6 +46,19 @@
             font-size: 0.9em;
         }
+        .task-id, .request-id {
+            color: #666;
+            font-size: 0.9em;
+            margin-left: 10px;
+        }
+        .task-id code, .request-id code {
+            background: #e9ecef;
+            padding: 2px 6px;
+            border-radius: 3px;
+            font-family: monospace;
+        }
         .screenshot-container {
             position: relative;
             display: inline-block;
@@ -339,6 +352,9 @@
                     html += '<div class="step">';
                     html += `<h2>Step ${event.step_num}</h2>`;
                     html += `<span class="timestamp">${timestamp}</span>`;
+                    if (event.task_id) {
+                        html += ` <span class="task-id">Task ID: <code>${event.task_id}</code></span>`;
+                    }
                     if (event.image) {
                         const actionsJson = JSON.stringify(event.action_coords || []).replace(/"/g, '&quot;');
@@ -409,6 +425,9 @@
                     html += '<div class="plan">';
                     html += `<h3>${phaseTitle}</h3>`;
                     html += `<span class="timestamp">${timestamp}</span>`;
+                    if (event.request_id) {
+                        html += ` <span class="request-id">Request ID: <code>${event.request_id}</code></span>`;
+                    }
                     if (event.image) {
                         html += '<div class="screenshot-container">';

oagi/agent/tasker/planner.py CHANGED Viewed

@@ -122,7 +122,7 @@ class Planner:
         screenshot: Image | URL | None = None,
         memory: PlannerMemory | None = None,
         todo_index: int | None = None,
-    ) -> PlannerOutput:
+    ) -> tuple[PlannerOutput, str | None]:
         """Generate initial plan for a todo.
         Args:
@@ -133,7 +133,7 @@ class Planner:
             todo_index: Optional todo index for formatting internal context
         Returns:
-            PlannerOutput with instruction, reasoning, and optional subtodos
+            Tuple of (PlannerOutput, request_id) where request_id is from API response
         """
         # Ensure we have a client
         client = self._ensure_client()
@@ -170,8 +170,8 @@ class Planner:
             current_screenshot=screenshot_uuid,
         )
-        # Parse response
-        return self._parse_planner_output(response.response)
+        # Parse response and return with request_id
+        return self._parse_planner_output(response.response), response.request_id
     async def reflect(
         self,
@@ -182,7 +182,7 @@ class Planner:
         todo_index: int | None = None,
         current_instruction: str | None = None,
         reflection_interval: int = DEFAULT_REFLECTION_INTERVAL,
-    ) -> ReflectionOutput:
+    ) -> tuple[ReflectionOutput, str | None]:
         """Reflect on recent actions and progress.
         Args:
@@ -195,7 +195,7 @@ class Planner:
             reflection_interval: Window size for recent actions/screenshots
         Returns:
-            ReflectionOutput with continuation decision and reasoning
+            Tuple of (ReflectionOutput, request_id) where request_id is from API response
         """
         # Ensure we have a client
         client = self._ensure_client()
@@ -260,8 +260,8 @@ class Planner:
             prior_notes=prior_notes,
         )
-        # Parse response
-        return self._parse_reflection_output(response.response)
+        # Parse response and return with request_id
+        return self._parse_reflection_output(response.response), response.request_id
     async def summarize(
         self,
@@ -269,7 +269,7 @@ class Planner:
         context: dict[str, Any],
         memory: PlannerMemory | None = None,
         todo_index: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """Generate execution summary.
         Args:
@@ -279,7 +279,7 @@ class Planner:
             todo_index: Optional todo index for formatting internal context
         Returns:
-            String summary of the execution
+            Tuple of (summary string, request_id) where request_id is from API response
         """
         # Ensure we have a client
         client = self._ensure_client()
@@ -314,9 +314,11 @@ class Planner:
         # Parse response and extract summary
         try:
             result = json.loads(response.response)
-            return result.get("task_summary", response.response)
+            summary = result.get("task_summary", response.response)
         except json.JSONDecodeError:
-            return response.response
+            summary = response.response
+        return summary, response.request_id
     def _format_execution_notes(self, context: dict[str, Any]) -> str:
         """Format execution history notes.

oagi/agent/tasker/taskee_agent.py CHANGED Viewed

@@ -200,7 +200,7 @@ class TaskeeAgent(AsyncAgent):
         context = self._get_context()
         # Generate plan using LLM planner
-        plan_output = await self.planner.initial_plan(
+        plan_output, request_id = await self.planner.initial_plan(
             self.current_todo,
             context,
             screenshot,
@@ -224,6 +224,7 @@ class TaskeeAgent(AsyncAgent):
                     image=_serialize_image(screenshot),
                     reasoning=plan_output.reasoning,
                     result=plan_output.instruction,
+                    request_id=request_id,
                 )
             )
@@ -309,6 +310,7 @@ class TaskeeAgent(AsyncAgent):
                         step_num=self.total_actions + 1,
                         image=_serialize_image(screenshot),
                         step=step,
+                        task_id=self.actor.task_id,
                     )
                 )
@@ -393,7 +395,7 @@ class TaskeeAgent(AsyncAgent):
         recent_actions = self.actions[-self.since_reflection :]
         # Reflect using planner
-        reflection = await self.planner.reflect(
+        reflection, request_id = await self.planner.reflect(
             recent_actions,
             context,
             screenshot,
@@ -424,6 +426,7 @@ class TaskeeAgent(AsyncAgent):
                     image=_serialize_image(screenshot),
                     reasoning=reflection.reasoning,
                     result=decision,
+                    request_id=request_id,
                 )
             )
@@ -456,7 +459,7 @@ class TaskeeAgent(AsyncAgent):
         context = self._get_context()
         context["current_todo"] = self.current_todo
-        summary = await self.planner.summarize(
+        summary, request_id = await self.planner.summarize(
             self.actions,
             context,
             memory=self.external_memory,
@@ -478,6 +481,7 @@ class TaskeeAgent(AsyncAgent):
                     image=None,
                     reasoning=summary,
                     result=None,
+                    request_id=request_id,
                 )
             )

oagi-core 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

oagi-core 0.11.0py3-none-any.whl → 0.12.0py3-none-any.whl