PyPI - oagi-core - Versions diffs - 0.10.1__py3-none-any.whl - Mend

oagi-core 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

oagi/__init__.py +148 -0
oagi/agent/__init__.py +33 -0
oagi/agent/default.py +124 -0
oagi/agent/factories.py +74 -0
oagi/agent/observer/__init__.py +38 -0
oagi/agent/observer/agent_observer.py +99 -0
oagi/agent/observer/events.py +28 -0
oagi/agent/observer/exporters.py +445 -0
oagi/agent/observer/protocol.py +12 -0
oagi/agent/protocol.py +55 -0
oagi/agent/registry.py +155 -0
oagi/agent/tasker/__init__.py +33 -0
oagi/agent/tasker/memory.py +160 -0
oagi/agent/tasker/models.py +77 -0
oagi/agent/tasker/planner.py +408 -0
oagi/agent/tasker/taskee_agent.py +512 -0
oagi/agent/tasker/tasker_agent.py +324 -0
oagi/cli/__init__.py +11 -0
oagi/cli/agent.py +281 -0
oagi/cli/display.py +56 -0
oagi/cli/main.py +77 -0
oagi/cli/server.py +94 -0
oagi/cli/tracking.py +55 -0
oagi/cli/utils.py +89 -0
oagi/client/__init__.py +12 -0
oagi/client/async_.py +290 -0
oagi/client/base.py +457 -0
oagi/client/sync.py +293 -0
oagi/exceptions.py +118 -0
oagi/handler/__init__.py +24 -0
oagi/handler/_macos.py +55 -0
oagi/handler/async_pyautogui_action_handler.py +44 -0
oagi/handler/async_screenshot_maker.py +47 -0
oagi/handler/pil_image.py +102 -0
oagi/handler/pyautogui_action_handler.py +291 -0
oagi/handler/screenshot_maker.py +41 -0
oagi/logging.py +55 -0
oagi/server/__init__.py +13 -0
oagi/server/agent_wrappers.py +98 -0
oagi/server/config.py +46 -0
oagi/server/main.py +157 -0
oagi/server/models.py +98 -0
oagi/server/session_store.py +116 -0
oagi/server/socketio_server.py +405 -0
oagi/task/__init__.py +21 -0
oagi/task/async_.py +101 -0
oagi/task/async_short.py +76 -0
oagi/task/base.py +157 -0
oagi/task/short.py +76 -0
oagi/task/sync.py +99 -0
oagi/types/__init__.py +50 -0
oagi/types/action_handler.py +30 -0
oagi/types/async_action_handler.py +30 -0
oagi/types/async_image_provider.py +38 -0
oagi/types/image.py +17 -0
oagi/types/image_provider.py +35 -0
oagi/types/models/__init__.py +32 -0
oagi/types/models/action.py +33 -0
oagi/types/models/client.py +68 -0
oagi/types/models/image_config.py +47 -0
oagi/types/models/step.py +17 -0
oagi/types/step_observer.py +93 -0
oagi/types/url.py +3 -0
oagi_core-0.10.1.dist-info/METADATA +245 -0
oagi_core-0.10.1.dist-info/RECORD +68 -0
oagi_core-0.10.1.dist-info/WHEEL +4 -0
oagi_core-0.10.1.dist-info/entry_points.txt +2 -0
oagi_core-0.10.1.dist-info/licenses/LICENSE +21 -0

oagi/handler/pyautogui_action_handler.py ADDED Viewed

@@ -0,0 +1,291 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import re
+import sys
+import time
+from pydantic import BaseModel, Field
+from ..exceptions import check_optional_dependency
+from ..types import Action, ActionType
+check_optional_dependency("pyautogui", "PyautoguiActionHandler", "desktop")
+import pyautogui  # noqa: E402
+if sys.platform == "darwin":
+    from . import _macos
+class CapsLockManager:
+    """Manages caps lock state for text transformation."""
+    def __init__(self, mode: str = "session"):
+        """Initialize caps lock manager.
+        Args:
+            mode: Either "session" (internal state) or "system" (OS-level)
+        """
+        self.mode = mode
+        self.caps_enabled = False
+    def toggle(self):
+        """Toggle caps lock state in session mode."""
+        if self.mode == "session":
+            self.caps_enabled = not self.caps_enabled
+    def transform_text(self, text: str) -> str:
+        """Transform text based on caps lock state.
+        Args:
+            text: Input text to transform
+        Returns:
+            Transformed text (uppercase if caps enabled in session mode)
+        """
+        if self.mode == "session" and self.caps_enabled:
+            # Transform letters to uppercase, preserve special characters
+            return "".join(c.upper() if c.isalpha() else c for c in text)
+        return text
+    def should_use_system_capslock(self) -> bool:
+        """Check if system-level caps lock should be used."""
+        return self.mode == "system"
+class PyautoguiConfig(BaseModel):
+    """Configuration for PyautoguiActionHandler."""
+    drag_duration: float = Field(
+        default=0.5, description="Duration for drag operations in seconds"
+    )
+    scroll_amount: int = Field(
+        default=30, description="Amount to scroll (positive for up, negative for down)"
+    )
+    wait_duration: float = Field(
+        default=1.0, description="Duration for wait actions in seconds"
+    )
+    action_pause: float = Field(
+        default=0.1, description="Pause between PyAutoGUI actions in seconds"
+    )
+    hotkey_interval: float = Field(
+        default=0.1, description="Interval between key presses in hotkey combinations"
+    )
+    capslock_mode: str = Field(
+        default="session",
+        description="Caps lock handling mode: 'session' (internal state) or 'system' (OS-level)",
+    )
+    macos_ctrl_to_cmd: bool = Field(
+        default=True,
+        description="Replace 'ctrl' with 'command' in hotkey combinations on macOS",
+    )
+class PyautoguiActionHandler:
+    """
+    Handles actions to be executed using PyAutoGUI.
+    This class provides functionality for handling and executing a sequence of
+    actions using the PyAutoGUI library. It processes a list of actions and executes
+    them as per the implementation.
+    Methods:
+        __call__: Executes the provided list of actions.
+    Args:
+        actions (list[Action]): List of actions to be processed and executed.
+    """
+    def __init__(self, config: PyautoguiConfig | None = None):
+        # Use default config if none provided
+        self.config = config or PyautoguiConfig()
+        # Get screen dimensions for coordinate denormalization
+        self.screen_width, self.screen_height = pyautogui.size()
+        # Set default delay between actions
+        pyautogui.PAUSE = self.config.action_pause
+        # Initialize caps lock manager
+        self.caps_manager = CapsLockManager(mode=self.config.capslock_mode)
+    def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
+        """Convert coordinates from 0-1000 range to actual screen coordinates.
+        Also handles corner coordinates to prevent PyAutoGUI fail-safe trigger.
+        Corner coordinates (0,0), (0,max), (max,0), (max,max) are offset by 1 pixel.
+        """
+        screen_x = int(x * self.screen_width / 1000)
+        screen_y = int(y * self.screen_height / 1000)
+        # Prevent fail-safe by adjusting corner coordinates
+        # Check if coordinates are at screen corners (with small tolerance)
+        if screen_x < 1:
+            screen_x = 1
+        elif screen_x > self.screen_width - 1:
+            screen_x = self.screen_width - 1
+        if screen_y < 1:
+            screen_y = 1
+        elif screen_y > self.screen_height - 1:
+            screen_y = self.screen_height - 1
+        return screen_x, screen_y
+    def _parse_coords(self, args_str: str) -> tuple[int, int]:
+        """Extract x, y coordinates from argument string."""
+        match = re.match(r"(\d+),\s*(\d+)", args_str)
+        if not match:
+            raise ValueError(f"Invalid coordinates format: {args_str}")
+        x, y = int(match.group(1)), int(match.group(2))
+        return self._denormalize_coords(x, y)
+    def _parse_drag_coords(self, args_str: str) -> tuple[int, int, int, int]:
+        """Extract x1, y1, x2, y2 coordinates from drag argument string."""
+        match = re.match(r"(\d+),\s*(\d+),\s*(\d+),\s*(\d+)", args_str)
+        if not match:
+            raise ValueError(f"Invalid drag coordinates format: {args_str}")
+        x1, y1, x2, y2 = (
+            int(match.group(1)),
+            int(match.group(2)),
+            int(match.group(3)),
+            int(match.group(4)),
+        )
+        x1, y1 = self._denormalize_coords(x1, y1)
+        x2, y2 = self._denormalize_coords(x2, y2)
+        return x1, y1, x2, y2
+    def _parse_scroll(self, args_str: str) -> tuple[int, int, str]:
+        """Extract x, y, direction from scroll argument string."""
+        match = re.match(r"(\d+),\s*(\d+),\s*(\w+)", args_str)
+        if not match:
+            raise ValueError(f"Invalid scroll format: {args_str}")
+        x, y = int(match.group(1)), int(match.group(2))
+        x, y = self._denormalize_coords(x, y)
+        direction = match.group(3).lower()
+        return x, y, direction
+    def _normalize_key(self, key: str) -> str:
+        """Normalize key names for consistency."""
+        key = key.strip().lower()
+        # Normalize caps lock variations
+        hotkey_variations_mapping = {
+            "capslock": ["caps_lock", "caps", "capslock"],
+            "pgup": ["page_up", "pageup"],
+            "pgdn": ["page_down", "pagedown"],
+        }
+        for normalized, variations in hotkey_variations_mapping.items():
+            if key in variations:
+                return normalized
+        # Remap ctrl to command on macOS if enabled
+        if self.config.macos_ctrl_to_cmd and sys.platform == "darwin" and key == "ctrl":
+            return "command"
+        return key
+    def _parse_hotkey(self, args_str: str) -> list[str]:
+        """Parse hotkey string into list of keys."""
+        # Remove parentheses if present
+        args_str = args_str.strip("()")
+        # Split by '+' to get individual keys
+        keys = [self._normalize_key(key) for key in args_str.split("+")]
+        return keys
+    def _execute_single_action(self, action: Action) -> None:
+        """Execute a single action once."""
+        arg = action.argument.strip("()")  # Remove outer parentheses if present
+        match action.type:
+            case ActionType.CLICK:
+                x, y = self._parse_coords(arg)
+                pyautogui.click(x, y)
+            case ActionType.LEFT_DOUBLE:
+                x, y = self._parse_coords(arg)
+                if sys.platform == "darwin":
+                    _macos.macos_click(x, y, clicks=2)
+                else:
+                    pyautogui.doubleClick(x, y)
+            case ActionType.LEFT_TRIPLE:
+                x, y = self._parse_coords(arg)
+                if sys.platform == "darwin":
+                    _macos.macos_click(x, y, clicks=3)
+                else:
+                    pyautogui.tripleClick(x, y)
+            case ActionType.RIGHT_SINGLE:
+                x, y = self._parse_coords(arg)
+                pyautogui.rightClick(x, y)
+            case ActionType.DRAG:
+                x1, y1, x2, y2 = self._parse_drag_coords(arg)
+                pyautogui.moveTo(x1, y1)
+                pyautogui.dragTo(
+                    x2, y2, duration=self.config.drag_duration, button="left"
+                )
+            case ActionType.HOTKEY:
+                keys = self._parse_hotkey(arg)
+                # Check if this is a caps lock key press
+                if len(keys) == 1 and keys[0] == "capslock":
+                    if self.caps_manager.should_use_system_capslock():
+                        # System mode: use OS-level caps lock
+                        pyautogui.hotkey(
+                            "capslock", interval=self.config.hotkey_interval
+                        )
+                    else:
+                        # Session mode: toggle internal state
+                        self.caps_manager.toggle()
+                else:
+                    # Regular hotkey combination
+                    pyautogui.hotkey(*keys, interval=self.config.hotkey_interval)
+            case ActionType.TYPE:
+                # Remove quotes if present
+                text = arg.strip("\"'")
+                # Apply caps lock transformation if needed
+                text = self.caps_manager.transform_text(text)
+                pyautogui.typewrite(text)
+            case ActionType.SCROLL:
+                x, y, direction = self._parse_scroll(arg)
+                pyautogui.moveTo(x, y)
+                scroll_amount = (
+                    self.config.scroll_amount
+                    if direction == "up"
+                    else -self.config.scroll_amount
+                )
+                pyautogui.scroll(scroll_amount)
+            case ActionType.FINISH:
+                # Task completion - no action needed
+                pass
+            case ActionType.WAIT:
+                # Wait for a short period
+                time.sleep(self.config.wait_duration)
+            case ActionType.CALL_USER:
+                # Call user - implementation depends on requirements
+                print("User intervention requested")
+            case _:
+                print(f"Unknown action type: {action.type}")
+    def _execute_action(self, action: Action) -> None:
+        """Execute an action, potentially multiple times."""
+        count = action.count or 1
+        for _ in range(count):
+            self._execute_single_action(action)
+    def __call__(self, actions: list[Action]) -> None:
+        """Execute the provided list of actions."""
+        for action in actions:
+            try:
+                self._execute_action(action)
+            except Exception as e:
+                print(f"Error executing action {action.type}: {e}")
+                raise

oagi/handler/screenshot_maker.py ADDED Viewed

@@ -0,0 +1,41 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from typing import Optional
+from ..types import Image
+from ..types.models.image_config import ImageConfig
+from .pil_image import PILImage
+class ScreenshotMaker:
+    """Takes screenshots using pyautogui."""
+    def __init__(self, config: ImageConfig | None = None):
+        self.config = config or ImageConfig()
+        self._last_image: Optional[PILImage] = None
+    def __call__(self) -> Image:
+        """Take and process a screenshot."""
+        # Create PILImage from screenshot
+        pil_image = PILImage.from_screenshot()
+        # Apply transformation if config is set
+        if self.config:
+            pil_image = pil_image.transform(self.config)
+        # Store as the last image
+        self._last_image = pil_image
+        return pil_image
+    def last_image(self) -> Image:
+        """Return the last screenshot taken, or take a new one if none exists."""
+        if self._last_image is None:
+            return self()
+        return self._last_image

oagi/logging.py ADDED Viewed

@@ -0,0 +1,55 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import logging
+import os
+def get_logger(name: str) -> logging.Logger:
+    """
+    Get a logger with the specified name under the 'oagi' namespace.
+    Log level is controlled by OAGI_LOG environment variable.
+    Valid values: DEBUG, INFO, WARNING, ERROR, CRITICAL
+    Default: INFO
+    """
+    logger = logging.getLogger(f"oagi.{name}")
+    oagi_root = logging.getLogger("oagi")
+    # Get log level from environment
+    log_level = os.getenv("OAGI_LOG", "INFO").upper()
+    # Convert string to logging level
+    try:
+        level = getattr(logging, log_level)
+    except AttributeError:
+        level = logging.INFO
+    # Configure root oagi logger once
+    if not oagi_root.handlers:
+        handler = logging.StreamHandler()
+        formatter = logging.Formatter(
+            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        )
+        handler.setFormatter(formatter)
+        oagi_root.addHandler(handler)
+        # Prevent propagation to root logger to avoid duplicate logs
+        oagi_root.propagate = False
+    # Always update level in case environment variable changed
+    oagi_root.setLevel(level)
+    # Suppress verbose httpx logs unless DEBUG level is enabled
+    # httpx logs every HTTP request at INFO level by default
+    httpx_logger = logging.getLogger("httpx")
+    if level == logging.DEBUG:
+        httpx_logger.setLevel(logging.DEBUG)
+    else:
+        httpx_logger.setLevel(logging.WARNING)
+    return logger

oagi/server/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from .config import ServerConfig
+from .main import create_app
+from .socketio_server import sio
+__all__ = ["create_app", "sio", "ServerConfig"]

oagi/server/agent_wrappers.py ADDED Viewed

@@ -0,0 +1,98 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import logging
+from typing import TYPE_CHECKING
+from ..types import URL
+from ..types.models.action import Action
+from .models import ScreenshotRequestData, ScreenshotResponseData
+if TYPE_CHECKING:
+    from .session_store import Session
+    from .socketio_server import SessionNamespace
+logger = logging.getLogger(__name__)
+class SocketIOActionHandler:
+    """Wraps Socket.IO connection as an AsyncActionHandler.
+    This handler emits actions through the Socket.IO connection to the client.
+    """
+    def __init__(self, namespace: "SessionNamespace", session: "Session"):
+        self.namespace = namespace
+        self.session = session
+    async def __call__(self, actions: list[Action]) -> None:
+        if not actions:
+            logger.debug("No actions to execute")
+            return
+        logger.debug(f"Executing {len(actions)} actions via Socket.IO")
+        await self.namespace._emit_actions(self.session, actions)
+class SocketIOImageProvider:
+    """Wraps Socket.IO connection as an AsyncImageProvider.
+    This provider requests screenshots from the client through Socket.IO.
+    """
+    def __init__(
+        self,
+        namespace: "SessionNamespace",
+        session: "Session",
+        oagi_client,
+    ):
+        self.namespace = namespace
+        self.session = session
+        self.oagi_client = oagi_client
+        self._last_url: str | None = None
+    async def __call__(self) -> URL:
+        logger.debug("Requesting screenshot via Socket.IO")
+        # Get S3 presigned URL from OAGI
+        upload_response = await self.oagi_client.get_s3_presigned_url()
+        # Request screenshot from client with the presigned URL
+        screenshot_data = await self.namespace.call(
+            "request_screenshot",
+            ScreenshotRequestData(
+                presigned_url=upload_response.url,
+                uuid=upload_response.uuid,
+                expires_at=str(upload_response.expires_at),  # Convert int to string
+            ).model_dump(),
+            to=self.session.socket_id,
+            timeout=self.namespace.config.socketio_timeout,
+        )
+        if not screenshot_data:
+            raise Exception("No response from screenshot request")
+        # Validate response
+        ack = ScreenshotResponseData(**screenshot_data)
+        if not ack.success:
+            raise Exception(f"Screenshot upload failed: {ack.error}")
+        # Store the URL for last_image()
+        self._last_url = upload_response.download_url
+        self.session.current_screenshot_url = upload_response.download_url
+        logger.debug(f"Screenshot captured successfully: {upload_response.uuid}")
+        return URL(upload_response.download_url)
+    async def last_image(self) -> URL:
+        if self._last_url:
+            logger.debug("Returning last captured screenshot")
+            return URL(self._last_url)
+        logger.debug("No previous screenshot, capturing new one")
+        return await self()

oagi/server/config.py ADDED Viewed

@@ -0,0 +1,46 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from pydantic import Field
+from ..exceptions import check_optional_dependency
+check_optional_dependency("pydantic_settings", "Server features", "server")
+from pydantic_settings import BaseSettings  # noqa: E402
+class ServerConfig(BaseSettings):
+    # OAGI API settings
+    oagi_api_key: str = Field(..., alias="OAGI_API_KEY")
+    oagi_base_url: str = Field(default="https://api.agiopen.org", alias="OAGI_BASE_URL")
+    # Server settings
+    server_host: str = Field(default="0.0.0.0", alias="OAGI_SERVER_HOST")
+    server_port: int = Field(default=8000, alias="OAGI_SERVER_PORT")
+    cors_allowed_origins: str = Field(default="*", alias="OAGI_CORS_ORIGINS")
+    # Session settings
+    session_timeout_seconds: float = Field(default=10.0)
+    # Model settings
+    default_model: str = Field(default="lux-actor-1", alias="OAGI_DEFAULT_MODEL")
+    default_temperature: float = Field(default=0.5, ge=0.0, le=2.0)
+    # Agent settings
+    max_steps: int = Field(default=20, alias="OAGI_MAX_STEPS", ge=1, le=100)
+    # Socket.IO settings
+    socketio_path: str = Field(default="/socket.io")
+    socketio_timeout: float = Field(default=30.0)
+    model_config = {
+        "env_file": ".env",
+        "env_file_encoding": "utf-8",
+        "populate_by_name": True,
+        "extra": "ignore",
+    }