PyPI - oagi-core - Versions diffs - 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

oagi-core 0.9.1py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

oagi/__init__.py +76 -33
oagi/agent/__init__.py +2 -0
oagi/agent/default.py +45 -12
oagi/agent/factories.py +22 -3
oagi/agent/observer/__init__.py +38 -0
oagi/agent/observer/agent_observer.py +99 -0
oagi/agent/observer/events.py +28 -0
oagi/agent/observer/exporters.py +445 -0
oagi/agent/observer/protocol.py +12 -0
oagi/agent/registry.py +2 -2
oagi/agent/tasker/models.py +1 -0
oagi/agent/tasker/planner.py +41 -9
oagi/agent/tasker/taskee_agent.py +178 -86
oagi/agent/tasker/tasker_agent.py +25 -14
oagi/cli/agent.py +50 -9
oagi/cli/tracking.py +27 -17
oagi/cli/utils.py +11 -4
oagi/client/base.py +3 -7
oagi/handler/_macos.py +55 -0
oagi/handler/pyautogui_action_handler.py +19 -2
oagi/server/agent_wrappers.py +5 -5
oagi/server/config.py +3 -3
oagi/server/models.py +2 -2
oagi/server/session_store.py +2 -2
oagi/server/socketio_server.py +1 -1
oagi/task/async_.py +13 -34
oagi/task/async_short.py +2 -2
oagi/task/base.py +41 -7
oagi/task/short.py +2 -2
oagi/task/sync.py +11 -34
oagi/types/__init__.py +24 -4
oagi/types/async_image_provider.py +3 -2
oagi/types/image_provider.py +3 -2
oagi/types/step_observer.py +75 -16
oagi/types/url.py +3 -0
{oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/METADATA +38 -25
oagi_core-0.10.0.dist-info/RECORD +68 -0
oagi/types/url_image.py +0 -47
oagi_core-0.9.1.dist-info/RECORD +0 -62
{oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/WHEEL +0 -0
{oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/entry_points.txt +0 -0
{oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/licenses/LICENSE +0 -0

oagi/client/base.py CHANGED Viewed

@@ -41,16 +41,12 @@ class BaseClient(Generic[HttpClientT]):
     def __init__(self, base_url: str | None = None, api_key: str | None = None):
         # Get from environment if not provided
-        self.base_url = base_url or os.getenv("OAGI_BASE_URL")
+        self.base_url = (
+            base_url or os.getenv("OAGI_BASE_URL") or "https://api.agiopen.org"
+        )
         self.api_key = api_key or os.getenv("OAGI_API_KEY")
         # Validate required configuration
-        if not self.base_url:
-            raise ConfigurationError(
-                "OAGI base URL must be provided either as 'base_url' parameter or "
-                "OAGI_BASE_URL environment variable"
-            )
         if not self.api_key:
             raise ConfigurationError(
                 "OAGI API key must be provided either as 'api_key' parameter or "

oagi/handler/_macos.py ADDED Viewed

@@ -0,0 +1,55 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import pyautogui
+from ..exceptions import check_optional_dependency
+check_optional_dependency("Quartz", "macOS multiple clicks", "desktop")
+import Quartz  # noqa: E402
+def macos_click(x: int, y: int, clicks: int = 1) -> None:
+    """
+    Execute a mouse click sequence on macOS with correct click state.
+    This avoids the PyAutoGUI bug where multi-clicks are sent as separate
+    single clicks (clickState=1), which macOS interprets as distinct events
+    rather than double/triple clicks.
+    Check https://github.com/asweigart/pyautogui/issues/672
+    Args:
+        x: X coordinate
+        y: Y coordinate
+        clicks: Number of clicks (1=single, 2=double, 3=triple)
+    """
+    # Move to position first using pyautogui to ensure consistency
+    pyautogui.moveTo(x, y)
+    point = Quartz.CGPoint(x=x, y=y)
+    # Create and post events for each click in the sequence
+    for i in range(1, clicks + 1):
+        # Create Down/Up events
+        mouse_down = Quartz.CGEventCreateMouseEvent(
+            None, Quartz.kCGEventLeftMouseDown, point, Quartz.kCGMouseButtonLeft
+        )
+        mouse_up = Quartz.CGEventCreateMouseEvent(
+            None, Quartz.kCGEventLeftMouseUp, point, Quartz.kCGMouseButtonLeft
+        )
+        # Set the click state (1 for first click, 2 for second, etc.)
+        Quartz.CGEventSetIntegerValueField(
+            mouse_down, Quartz.kCGMouseEventClickState, i
+        )
+        Quartz.CGEventSetIntegerValueField(mouse_up, Quartz.kCGMouseEventClickState, i)
+        # Post events
+        Quartz.CGEventPost(Quartz.kCGHIDEventTap, mouse_down)
+        Quartz.CGEventPost(Quartz.kCGHIDEventTap, mouse_up)

oagi/handler/pyautogui_action_handler.py CHANGED Viewed

@@ -7,6 +7,7 @@
 # -----------------------------------------------------------------------------
 import re
+import sys
 import time
 from pydantic import BaseModel, Field
@@ -17,6 +18,9 @@ from ..types import Action, ActionType
 check_optional_dependency("pyautogui", "PyautoguiActionHandler", "desktop")
 import pyautogui  # noqa: E402
+if sys.platform == "darwin":
+    from . import _macos
 class CapsLockManager:
     """Manages caps lock state for text transformation."""
@@ -76,6 +80,10 @@ class PyautoguiConfig(BaseModel):
         default="session",
         description="Caps lock handling mode: 'session' (internal state) or 'system' (OS-level)",
     )
+    macos_ctrl_to_cmd: bool = Field(
+        default=True,
+        description="Replace 'ctrl' with 'command' in hotkey combinations on macOS",
+    )
 class PyautoguiActionHandler:
@@ -165,6 +173,9 @@ class PyautoguiActionHandler:
         # Normalize caps lock variations
         if key in ["caps_lock", "caps", "capslock"]:
             return "capslock"
+        # Remap ctrl to command on macOS if enabled
+        if self.config.macos_ctrl_to_cmd and sys.platform == "darwin" and key == "ctrl":
+            return "command"
         return key
     def _parse_hotkey(self, args_str: str) -> list[str]:
@@ -186,11 +197,17 @@ class PyautoguiActionHandler:
             case ActionType.LEFT_DOUBLE:
                 x, y = self._parse_coords(arg)
-                pyautogui.doubleClick(x, y)
+                if sys.platform == "darwin":
+                    _macos.macos_click(x, y, clicks=2)
+                else:
+                    pyautogui.doubleClick(x, y)
             case ActionType.LEFT_TRIPLE:
                 x, y = self._parse_coords(arg)
-                pyautogui.tripleClick(x, y)
+                if sys.platform == "darwin":
+                    _macos.macos_click(x, y, clicks=3)
+                else:
+                    pyautogui.tripleClick(x, y)
             case ActionType.RIGHT_SINGLE:
                 x, y = self._parse_coords(arg)

oagi/server/agent_wrappers.py CHANGED Viewed

@@ -9,7 +9,7 @@
 import logging
 from typing import TYPE_CHECKING
-from ..types import URLImage
+from ..types import URL
 from ..types.models.action import Action
 from .models import ScreenshotRequestData, ScreenshotResponseData
@@ -56,7 +56,7 @@ class SocketIOImageProvider:
         self.oagi_client = oagi_client
         self._last_url: str | None = None
-    async def __call__(self) -> URLImage:
+    async def __call__(self) -> URL:
         logger.debug("Requesting screenshot via Socket.IO")
         # Get S3 presigned URL from OAGI
@@ -87,12 +87,12 @@ class SocketIOImageProvider:
         self.session.current_screenshot_url = upload_response.download_url
         logger.debug(f"Screenshot captured successfully: {upload_response.uuid}")
-        return URLImage(upload_response.download_url)
+        return URL(upload_response.download_url)
-    async def last_image(self) -> URLImage:
+    async def last_image(self) -> URL:
         if self._last_url:
             logger.debug("Returning last captured screenshot")
-            return URLImage(self._last_url)
+            return URL(self._last_url)
         logger.debug("No previous screenshot, capturing new one")
         return await self()

oagi/server/config.py CHANGED Viewed

@@ -28,11 +28,11 @@ class ServerConfig(BaseSettings):
     session_timeout_seconds: float = Field(default=10.0)
     # Model settings
-    default_model: str = Field(default="lux-v1", alias="OAGI_DEFAULT_MODEL")
-    default_temperature: float = Field(default=0.0, ge=0.0, le=2.0)
+    default_model: str = Field(default="lux-actor-1", alias="OAGI_DEFAULT_MODEL")
+    default_temperature: float = Field(default=0.5, ge=0.0, le=2.0)
     # Agent settings
-    max_steps: int = Field(default=30, alias="OAGI_MAX_STEPS", ge=1, le=100)
+    max_steps: int = Field(default=20, alias="OAGI_MAX_STEPS", ge=1, le=100)
     # Socket.IO settings
     socketio_path: str = Field(default="/socket.io")

oagi/server/models.py CHANGED Viewed

@@ -15,7 +15,7 @@ from pydantic import BaseModel, Field
 class InitEventData(BaseModel):
     instruction: str = Field(...)
     mode: str | None = Field(default="actor")
-    model: str | None = Field(default="lux-v1")
+    model: str | None = Field(default="lux-actor-1")
     temperature: float | None = Field(default=0.1, ge=0.0, le=2.0)
@@ -75,7 +75,7 @@ class ScreenshotResponseData(BaseModel):
 # Action acknowledgement
 class ActionAckData(BaseModel):
-    action_index: int = Field(...)
+    index: int = Field(...)
     success: bool = Field(...)
     error: str | None = Field(None)
     execution_time_ms: int | None = Field(None)

oagi/server/session_store.py CHANGED Viewed

@@ -18,7 +18,7 @@ class Session:
         session_id: str,
         instruction: str,
         mode: str = "actor",
-        model: str = "lux-v1",
+        model: str = "lux-actor-1",
         temperature: float = 0.0,
     ):
         self.session_id: str = session_id
@@ -54,7 +54,7 @@ class SessionStore:
         self,
         instruction: str,
         mode: str = "actor",
-        model: str = "lux-v1",
+        model: str = "lux-actor-1",
         temperature: float = 0.0,
         session_id: str | None = None,
     ) -> str:

oagi/server/socketio_server.py CHANGED Viewed

@@ -224,7 +224,7 @@ class SessionNamespace(socketio.AsyncNamespace):
                 # Emit finish event
                 await self.call(
                     "finish",
-                    FinishEventData(action_index=0, total_actions=1).model_dump(),
+                    FinishEventData(index=0, total=1).model_dump(),
                     to=session.socket_id,
                     timeout=self.config.socketio_timeout,
                 )

oagi/task/async_.py CHANGED Viewed

@@ -9,21 +9,18 @@
 import warnings
 from ..client import AsyncClient
-from ..logging import get_logger
-from ..types import Image, Step
-from .base import BaseTask
+from ..types import URL, Image, Step
+from .base import BaseActor
-logger = get_logger("async_task")
-class AsyncActor(BaseTask):
+class AsyncActor(BaseActor):
     """Async base class for task automation with the OAGI API."""
     def __init__(
         self,
         api_key: str | None = None,
         base_url: str | None = None,
-        model: str = "vision-model-v1",
+        model: str = "lux-actor-1",
         temperature: float | None = None,
     ):
         super().__init__(api_key, base_url, model, temperature)
@@ -34,19 +31,19 @@ class AsyncActor(BaseTask):
     async def init_task(
         self,
         task_desc: str,
-        max_steps: int = 5,
+        max_steps: int = 20,
     ):
         """Initialize a new task with the given description.
         Args:
             task_desc: Task description
-            max_steps: Maximum number of steps (for logging)
+            max_steps: Maximum number of steps allowed
         """
         self._prepare_init_task(task_desc, max_steps)
     async def step(
         self,
-        screenshot: Image | bytes,
+        screenshot: Image | URL | bytes,
         instruction: str | None = None,
         temperature: float | None = None,
     ) -> Step:
@@ -60,33 +57,15 @@ class AsyncActor(BaseTask):
         Returns:
             Step: The actions and reasoning for this step
         """
-        self._validate_step_preconditions()
-        self._log_step_execution(prefix="async ")
+        kwargs = self._prepare_step(
+            screenshot, instruction, temperature, prefix="async "
+        )
         try:
-            # Use provided temperature or fall back to task default
-            temp = self._get_temperature(temperature)
-            # Prepare screenshot kwargs (handles URLImage vs bytes/Image)
-            screenshot_kwargs = self._prepare_screenshot_kwargs(screenshot)
-            # Call API with dynamically determined screenshot argument
-            response = await self.client.create_message(
-                model=self.model,
-                task_description=self.task_description,
-                task_id=self.task_id,
-                instruction=instruction,
-                messages_history=self.message_history,
-                temperature=temp,
-                **screenshot_kwargs,
-            )
-            # Convert API response to Step (also updates message_history)
+            response = await self.client.create_message(**kwargs)
             return self._build_step_response(response, prefix="Async ")
         except Exception as e:
-            logger.error(f"Error during async step execution: {e}")
-            raise
+            self._handle_step_error(e, prefix="async ")
     async def close(self):
         """Close the underlying HTTP client to free resources."""
@@ -110,7 +89,7 @@ class AsyncTask(AsyncActor):
         self,
         api_key: str | None = None,
         base_url: str | None = None,
-        model: str = "vision-model-v1",
+        model: str = "lux-actor-1",
         temperature: float | None = None,
     ):
         warnings.warn(

oagi/task/async_short.py CHANGED Viewed

@@ -27,7 +27,7 @@ class AsyncShortTask(AsyncActor, BaseAutoMode):
         self,
         api_key: str | None = None,
         base_url: str | None = None,
-        model: str = "vision-model-v1",
+        model: str = "lux-actor-1",
         temperature: float | None = None,
     ):
         warnings.warn(
@@ -43,7 +43,7 @@ class AsyncShortTask(AsyncActor, BaseAutoMode):
     async def auto_mode(
         self,
         task_desc: str,
-        max_steps: int = 5,
+        max_steps: int = 20,
         executor: AsyncActionHandler = None,
         image_provider: AsyncImageProvider = None,
         temperature: float | None = None,

oagi/task/base.py CHANGED Viewed

@@ -9,14 +9,14 @@
 from uuid import uuid4
 from ..logging import get_logger
-from ..types import Image, Step, URLImage
+from ..types import URL, Image, Step
 from ..types.models import LLMResponse
 logger = get_logger("task.base")
-class BaseTask:
-    """Base class with shared task management logic for sync/async tasks."""
+class BaseActor:
+    """Base class with shared task management logic for sync/async actors."""
     def __init__(
         self,
@@ -30,6 +30,8 @@ class BaseTask:
         self.model = model
         self.temperature = temperature
         self.message_history: list = []  # OpenAI-compatible message history
+        self.max_steps: int = 20  # Maximum steps allowed
+        self.current_step: int = 0  # Current step counter
         # Client will be set by subclasses
         self.api_key: str | None = None
         self.base_url: str | None = None
@@ -48,11 +50,43 @@ class BaseTask:
         self.task_id = uuid4().hex
         self.task_description = task_desc
         self.message_history = []
+        self.max_steps = max_steps
+        self.current_step = 0
         logger.info(f"Task initialized: '{task_desc}' (max_steps: {max_steps})")
-    def _validate_step_preconditions(self):
+    def _validate_and_increment_step(self):
         if not self.task_description:
             raise ValueError("Task description must be set. Call init_task() first.")
+        if self.current_step >= self.max_steps:
+            raise ValueError(
+                f"Max steps limit ({self.max_steps}) reached. "
+                "Call init_task() to start a new task."
+            )
+        self.current_step += 1
+    def _prepare_step(
+        self,
+        screenshot: Image | URL | bytes,
+        instruction: str | None,
+        temperature: float | None,
+        prefix: str = "",
+    ) -> dict:
+        self._validate_and_increment_step()
+        self._log_step_execution(prefix=prefix)
+        return {
+            "model": self.model,
+            "task_description": self.task_description,
+            "task_id": self.task_id,
+            "instruction": instruction,
+            "messages_history": self.message_history,
+            "temperature": self._get_temperature(temperature),
+            **self._prepare_screenshot_kwargs(screenshot),
+        }
+    def _handle_step_error(self, error: Exception, prefix: str = ""):
+        logger.error(f"Error during {prefix}step execution: {error}")
+        raise
     def _prepare_screenshot(self, screenshot: Image | bytes) -> bytes:
         if isinstance(screenshot, Image):
@@ -62,9 +96,9 @@ class BaseTask:
     def _get_temperature(self, temperature: float | None) -> float | None:
         return temperature if temperature is not None else self.temperature
-    def _prepare_screenshot_kwargs(self, screenshot: Image | bytes) -> dict:
-        if isinstance(screenshot, URLImage):
-            return {"screenshot_url": screenshot.get_url()}
+    def _prepare_screenshot_kwargs(self, screenshot: Image | URL | bytes) -> dict:
+        if isinstance(screenshot, str):
+            return {"screenshot_url": screenshot}
         return {"screenshot": self._prepare_screenshot(screenshot)}
     def _handle_response_message_history(self, response: LLMResponse):

oagi/task/short.py CHANGED Viewed

@@ -27,7 +27,7 @@ class ShortTask(Actor, BaseAutoMode):
         self,
         api_key: str | None = None,
         base_url: str | None = None,
-        model: str = "vision-model-v1",
+        model: str = "lux-actor-1",
         temperature: float | None = None,
     ):
         warnings.warn(
@@ -43,7 +43,7 @@ class ShortTask(Actor, BaseAutoMode):
     def auto_mode(
         self,
         task_desc: str,
-        max_steps: int = 5,
+        max_steps: int = 20,
         executor: ActionHandler = None,
         image_provider: ImageProvider = None,
         temperature: float | None = None,

oagi/task/sync.py CHANGED Viewed

@@ -9,21 +9,18 @@
 import warnings
 from ..client import SyncClient
-from ..logging import get_logger
-from ..types import Image, Step
-from .base import BaseTask
+from ..types import URL, Image, Step
+from .base import BaseActor
-logger = get_logger("task")
-class Actor(BaseTask):
+class Actor(BaseActor):
     """Base class for task automation with the OAGI API."""
     def __init__(
         self,
         api_key: str | None = None,
         base_url: str | None = None,
-        model: str = "vision-model-v1",
+        model: str = "lux-actor-1",
         temperature: float | None = None,
     ):
         super().__init__(api_key, base_url, model, temperature)
@@ -34,19 +31,19 @@ class Actor(BaseTask):
     def init_task(
         self,
         task_desc: str,
-        max_steps: int = 5,
+        max_steps: int = 20,
     ):
         """Initialize a new task with the given description.
         Args:
             task_desc: Task description
-            max_steps: Maximum number of steps (for logging)
+            max_steps: Maximum number of steps allowed
         """
         self._prepare_init_task(task_desc, max_steps)
     def step(
         self,
-        screenshot: Image | bytes,
+        screenshot: Image | URL | bytes,
         instruction: str | None = None,
         temperature: float | None = None,
     ) -> Step:
@@ -60,33 +57,13 @@ class Actor(BaseTask):
         Returns:
             Step: The actions and reasoning for this step
         """
-        self._validate_step_preconditions()
-        self._log_step_execution()
+        kwargs = self._prepare_step(screenshot, instruction, temperature)
         try:
-            # Use provided temperature or fall back to task default
-            temp = self._get_temperature(temperature)
-            # Prepare screenshot kwargs (handles URLImage vs bytes/Image)
-            screenshot_kwargs = self._prepare_screenshot_kwargs(screenshot)
-            # Call API with dynamically determined screenshot argument
-            response = self.client.create_message(
-                model=self.model,
-                task_description=self.task_description,
-                task_id=self.task_id,
-                instruction=instruction,
-                messages_history=self.message_history,
-                temperature=temp,
-                **screenshot_kwargs,
-            )
-            # Convert API response to Step (also updates message_history)
+            response = self.client.create_message(**kwargs)
             return self._build_step_response(response)
         except Exception as e:
-            logger.error(f"Error during step execution: {e}")
-            raise
+            self._handle_step_error(e)
     def close(self):
         """Close the underlying HTTP client to free resources."""
@@ -110,7 +87,7 @@ class Task(Actor):
         self,
         api_key: str | None = None,
         base_url: str | None = None,
-        model: str = "vision-model-v1",
+        model: str = "lux-actor-1",
         temperature: float | None = None,
     ):
         warnings.warn(

oagi/types/__init__.py CHANGED Viewed

@@ -12,19 +12,39 @@ from .async_image_provider import AsyncImageProvider
 from .image import Image
 from .image_provider import ImageProvider
 from .models import Action, ActionType, ImageConfig, Step
-from .step_observer import AsyncStepObserver
-from .url_image import URLImage
+from .step_observer import (
+    ActionEvent,
+    AsyncObserver,
+    AsyncStepObserver,
+    BaseEvent,
+    ImageEvent,
+    LogEvent,
+    ObserverEvent,
+    PlanEvent,
+    SplitEvent,
+    StepEvent,
+)
+from .url import URL
 __all__ = [
     "Action",
+    "ActionEvent",
     "ActionType",
+    "AsyncObserver",
+    "AsyncStepObserver",
+    "BaseEvent",
     "Image",
     "ImageConfig",
+    "ImageEvent",
+    "LogEvent",
+    "ObserverEvent",
+    "PlanEvent",
+    "SplitEvent",
     "Step",
+    "StepEvent",
     "ActionHandler",
     "AsyncActionHandler",
     "ImageProvider",
     "AsyncImageProvider",
-    "AsyncStepObserver",
-    "URLImage",
+    "URL",
 ]

oagi/types/async_image_provider.py CHANGED Viewed

@@ -9,10 +9,11 @@
 from typing import Protocol
 from .image import Image
+from .url import URL
 class AsyncImageProvider(Protocol):
-    async def __call__(self) -> Image:
+    async def __call__(self) -> Image | URL:
         """
         Asynchronously provides an image.
@@ -28,7 +29,7 @@ class AsyncImageProvider(Protocol):
             RuntimeError: If an error occurs during image capture or generation.
         """
-    async def last_image(self) -> Image:
+    async def last_image(self) -> Image | URL:
         """
         Asynchronously returns the last captured image.

oagi/types/image_provider.py CHANGED Viewed

@@ -9,10 +9,11 @@
 from typing import Protocol
 from .image import Image
+from .url import URL
 class ImageProvider(Protocol):
-    def __call__(self) -> Image:
+    def __call__(self) -> Image | URL:
         """
         Represents the functionality to invoke the callable object and produce an Image
         result. Typically used to process or generate images using the defined logic
@@ -22,7 +23,7 @@ class ImageProvider(Protocol):
             Image: The resulting image output from the callable logic.
         """
-    def last_image(self) -> Image:
+    def last_image(self) -> Image | URL:
         """
         Returns the last captured image.

oagi-core 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

oagi-core 0.9.1py3-none-any.whl → 0.10.0py3-none-any.whl