PyPI - oagi - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

oagi 0.3.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of oagi might be problematic. Click here for more details.

Files changed (18) hide show

oagi/__init__.py +25 -3
oagi/async_client.py +239 -0
oagi/async_pyautogui_action_handler.py +44 -0
oagi/async_screenshot_maker.py +47 -0
oagi/async_short_task.py +56 -0
oagi/async_single_step.py +83 -0
oagi/async_task.py +117 -0
oagi/pyautogui_action_handler.py +86 -51
oagi/short_task.py +3 -3
oagi/types/__init__.py +4 -0
oagi/types/async_action_handler.py +30 -0
oagi/types/async_image_provider.py +37 -0
oagi/types/models/action.py +1 -0
{oagi-0.3.0.dist-info → oagi-0.4.1.dist-info}/METADATA +43 -1
oagi-0.4.1.dist-info/RECORD +30 -0
oagi-0.3.0.dist-info/RECORD +0 -22
{oagi-0.3.0.dist-info → oagi-0.4.1.dist-info}/WHEEL +0 -0
{oagi-0.3.0.dist-info → oagi-0.4.1.dist-info}/licenses/LICENSE +0 -0

oagi/__init__.py CHANGED Viewed

@@ -6,6 +6,12 @@
 #  Licensed under the MIT License.
 # -----------------------------------------------------------------------------
+from oagi.async_client import AsyncClient
+from oagi.async_pyautogui_action_handler import AsyncPyautoguiActionHandler
+from oagi.async_screenshot_maker import AsyncScreenshotMaker
+from oagi.async_short_task import AsyncShortTask
+from oagi.async_single_step import async_single_step
+from oagi.async_task import AsyncTask
 from oagi.exceptions import (
     APIError,
     AuthenticationError,
@@ -19,26 +25,42 @@ from oagi.exceptions import (
     ValidationError,
 )
 from oagi.pil_image import PILImage
-from oagi.pyautogui_action_handler import PyautoguiActionHandler
+from oagi.pyautogui_action_handler import PyautoguiActionHandler, PyautoguiConfig
 from oagi.screenshot_maker import ScreenshotMaker
 from oagi.short_task import ShortTask
 from oagi.single_step import single_step
 from oagi.sync_client import ErrorDetail, ErrorResponse, LLMResponse, SyncClient
 from oagi.task import Task
-from oagi.types import ImageConfig
+from oagi.types import (
+    AsyncActionHandler,
+    AsyncImageProvider,
+    ImageConfig,
+)
 __all__ = [
-    # Core classes
+    # Core sync classes
     "Task",
     "ShortTask",
     "SyncClient",
+    # Core async classes
+    "AsyncTask",
+    "AsyncShortTask",
+    "AsyncClient",
     # Functions
     "single_step",
+    "async_single_step",
     # Image classes
     "PILImage",
     # Handler classes
     "PyautoguiActionHandler",
+    "PyautoguiConfig",
     "ScreenshotMaker",
+    # Async handler classes
+    "AsyncPyautoguiActionHandler",
+    "AsyncScreenshotMaker",
+    # Async protocols
+    "AsyncActionHandler",
+    "AsyncImageProvider",
     # Configuration
     "ImageConfig",
     # Response models

oagi/async_client.py ADDED Viewed

@@ -0,0 +1,239 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import os
+from functools import wraps
+import httpx
+from .exceptions import (
+    APIError,
+    AuthenticationError,
+    ConfigurationError,
+    NetworkError,
+    NotFoundError,
+    RateLimitError,
+    RequestTimeoutError,
+    ServerError,
+    ValidationError,
+)
+from .logging import get_logger
+from .sync_client import ErrorResponse, LLMResponse
+logger = get_logger("async_client")
+def async_log_trace_on_failure(func):
+    """Async decorator that logs trace ID when a method fails."""
+    @wraps(func)
+    async def wrapper(*args, **kwargs):
+        try:
+            return await func(*args, **kwargs)
+        except Exception as e:
+            # Try to get response from the exception if it has one
+            if (response := getattr(e, "response", None)) is not None:
+                logger.error(f"Request Id: {response.headers.get('x-request-id', '')}")
+                logger.error(f"Trace Id: {response.headers.get('x-trace-id', '')}")
+            raise
+    return wrapper
+class AsyncClient:
+    """Async HTTP client for the OAGI API."""
+    def __init__(self, base_url: str | None = None, api_key: str | None = None):
+        # Get from environment if not provided
+        self.base_url = base_url or os.getenv("OAGI_BASE_URL")
+        self.api_key = api_key or os.getenv("OAGI_API_KEY")
+        # Validate required configuration
+        if not self.base_url:
+            raise ConfigurationError(
+                "OAGI base URL must be provided either as 'base_url' parameter or "
+                "OAGI_BASE_URL environment variable"
+            )
+        if not self.api_key:
+            raise ConfigurationError(
+                "OAGI API key must be provided either as 'api_key' parameter or "
+                "OAGI_API_KEY environment variable"
+            )
+        self.base_url = self.base_url.rstrip("/")
+        self.client = httpx.AsyncClient(base_url=self.base_url)
+        self.timeout = 60
+        logger.info(f"AsyncClient initialized with base_url: {self.base_url}")
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.client.aclose()
+    async def close(self):
+        """Close the underlying httpx async client"""
+        await self.client.aclose()
+    @async_log_trace_on_failure
+    async def create_message(
+        self,
+        model: str,
+        screenshot: str,  # base64 encoded
+        task_description: str | None = None,
+        task_id: str | None = None,
+        instruction: str | None = None,
+        max_actions: int | None = 5,
+        api_version: str | None = None,
+    ) -> LLMResponse:
+        """
+        Call the /v1/message endpoint to analyze task and screenshot
+        Args:
+            model: The model to use for task analysis
+            screenshot: Base64-encoded screenshot image
+            task_description: Description of the task (required for new sessions)
+            task_id: Task ID for continuing existing task
+            instruction: Additional instruction when continuing a session (only works with task_id)
+            max_actions: Maximum number of actions to return (1-20)
+            api_version: API version header
+        Returns:
+            LLMResponse: The response from the API
+        Raises:
+            httpx.HTTPStatusError: For HTTP error responses
+        """
+        headers = {}
+        if api_version:
+            headers["x-api-version"] = api_version
+        if self.api_key:
+            headers["x-api-key"] = self.api_key
+        payload = {"model": model, "screenshot": screenshot}
+        if task_description is not None:
+            payload["task_description"] = task_description
+        if task_id is not None:
+            payload["task_id"] = task_id
+        if instruction is not None:
+            payload["instruction"] = instruction
+        if max_actions is not None:
+            payload["max_actions"] = max_actions
+        logger.info(f"Making async API request to /v1/message with model: {model}")
+        logger.debug(
+            f"Request includes task_description: {task_description is not None}, task_id: {task_id is not None}"
+        )
+        try:
+            response = await self.client.post(
+                "/v1/message", json=payload, headers=headers, timeout=self.timeout
+            )
+        except httpx.TimeoutException as e:
+            logger.error(f"Request timed out after {self.timeout} seconds")
+            raise RequestTimeoutError(
+                f"Request timed out after {self.timeout} seconds", e
+            )
+        except httpx.NetworkError as e:
+            logger.error(f"Network error: {e}")
+            raise NetworkError(f"Network error: {e}", e)
+        try:
+            response_data = response.json()
+        except ValueError:
+            # If response is not JSON, raise API error
+            logger.error(f"Non-JSON API response: {response.status_code}")
+            raise APIError(
+                f"Invalid response format (status {response.status_code})",
+                status_code=response.status_code,
+                response=response,
+            )
+        # Check if it's an error response (non-200 status or has error field)
+        if response.status_code != 200:
+            error_resp = ErrorResponse(**response_data)
+            if error_resp.error:
+                error_code = error_resp.error.code
+                error_msg = error_resp.error.message
+                logger.error(f"API Error [{error_code}]: {error_msg}")
+                # Map to specific exception types based on status code
+                exception_class = self._get_exception_class(response.status_code)
+                raise exception_class(
+                    error_msg,
+                    code=error_code,
+                    status_code=response.status_code,
+                    response=response,
+                )
+            else:
+                # Error response without error details
+                logger.error(
+                    f"API error response without details: {response.status_code}"
+                )
+                exception_class = self._get_exception_class(response.status_code)
+                raise exception_class(
+                    f"API error (status {response.status_code})",
+                    status_code=response.status_code,
+                    response=response,
+                )
+        # Parse successful response
+        result = LLMResponse(**response_data)
+        # Check if the response contains an error (even with 200 status)
+        if result.error:
+            logger.error(
+                f"API Error in response: [{result.error.code}]: {result.error.message}"
+            )
+            raise APIError(
+                result.error.message,
+                code=result.error.code,
+                status_code=200,
+                response=response,
+            )
+        logger.info(
+            f"Async API request successful - task_id: {result.task_id}, step: {result.current_step}, complete: {result.is_complete}"
+        )
+        logger.debug(f"Response included {len(result.actions)} actions")
+        return result
+    def _get_exception_class(self, status_code: int) -> type[APIError]:
+        """Get the appropriate exception class based on status code."""
+        status_map = {
+            401: AuthenticationError,
+            404: NotFoundError,
+            422: ValidationError,
+            429: RateLimitError,
+        }
+        if status_code >= 500:
+            return ServerError
+        return status_map.get(status_code, APIError)
+    async def health_check(self) -> dict:
+        """
+        Call the /health endpoint for health check
+        Returns:
+            dict: Health check response
+        """
+        logger.debug("Making async health check request")
+        try:
+            response = await self.client.get("/health")
+            response.raise_for_status()
+            result = response.json()
+            logger.debug("Async health check successful")
+            return result
+        except httpx.HTTPStatusError as e:
+            logger.warning(f"Async health check failed: {e}")
+            raise

oagi/async_pyautogui_action_handler.py ADDED Viewed

@@ -0,0 +1,44 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import asyncio
+from .pyautogui_action_handler import PyautoguiActionHandler, PyautoguiConfig
+from .types import Action
+class AsyncPyautoguiActionHandler:
+    """
+    Async wrapper for PyautoguiActionHandler that runs actions in a thread pool.
+    This allows PyAutoGUI operations to be non-blocking in async contexts,
+    enabling concurrent execution of other async tasks while GUI actions are performed.
+    """
+    def __init__(self, config: PyautoguiConfig | None = None):
+        """Initialize with optional configuration.
+        Args:
+            config: PyautoguiConfig instance for customizing behavior
+        """
+        self.sync_handler = PyautoguiActionHandler(config=config)
+        self.config = config or PyautoguiConfig()
+    async def __call__(self, actions: list[Action]) -> None:
+        """
+        Execute actions asynchronously using a thread pool executor.
+        This prevents PyAutoGUI operations from blocking the async event loop,
+        allowing other coroutines to run while GUI actions are being performed.
+        Args:
+            actions: List of actions to execute
+        """
+        loop = asyncio.get_event_loop()
+        # Run the synchronous handler in a thread pool to avoid blocking
+        await loop.run_in_executor(None, self.sync_handler, actions)

oagi/async_screenshot_maker.py ADDED Viewed

@@ -0,0 +1,47 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import asyncio
+from .screenshot_maker import ScreenshotMaker
+from .types import Image, ImageConfig
+class AsyncScreenshotMaker:
+    """
+    Async wrapper for ScreenshotMaker that captures screenshots in a thread pool.
+    This allows screenshot capture to be non-blocking in async contexts,
+    enabling concurrent execution of other async tasks while screenshots are taken.
+    """
+    def __init__(self, config: ImageConfig | None = None):
+        """Initialize with optional image configuration.
+        Args:
+            config: ImageConfig instance for customizing screenshot format and quality
+        """
+        self.sync_screenshot_maker = ScreenshotMaker(config=config)
+        self.config = config
+    async def __call__(self) -> Image:
+        """
+        Capture a screenshot asynchronously using a thread pool executor.
+        This prevents screenshot capture from blocking the async event loop,
+        allowing other coroutines to run while the screenshot is being taken.
+        Returns:
+            Image: The captured screenshot as a PILImage
+        """
+        loop = asyncio.get_event_loop()
+        # Run the synchronous screenshot capture in a thread pool to avoid blocking
+        return await loop.run_in_executor(None, self.sync_screenshot_maker)
+    async def last_image(self) -> Image:
+        return self.sync_screenshot_maker.last_image()

oagi/async_short_task.py ADDED Viewed

@@ -0,0 +1,56 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from .async_task import AsyncTask
+from .logging import get_logger
+from .types import AsyncActionHandler, AsyncImageProvider
+logger = get_logger("async_short_task")
+class AsyncShortTask(AsyncTask):
+    """Async task implementation with automatic mode for short-duration tasks."""
+    def __init__(
+        self,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        model: str = "vision-model-v1",
+    ):
+        super().__init__(api_key=api_key, base_url=base_url, model=model)
+    async def auto_mode(
+        self,
+        task_desc: str,
+        max_steps: int = 5,
+        executor: AsyncActionHandler = None,
+        image_provider: AsyncImageProvider = None,
+    ) -> bool:
+        """Run the task in automatic mode with the provided executor and image provider."""
+        logger.info(
+            f"Starting async auto mode for task: '{task_desc}' (max_steps: {max_steps})"
+        )
+        await self.init_task(task_desc, max_steps=max_steps)
+        for i in range(max_steps):
+            logger.debug(f"Async auto mode step {i + 1}/{max_steps}")
+            image = await image_provider()
+            step = await self.step(image)
+            if executor:
+                logger.debug(f"Executing {len(step.actions)} actions asynchronously")
+                await executor(step.actions)
+            if step.stop:
+                logger.info(
+                    f"Async auto mode completed successfully after {i + 1} steps"
+                )
+                return True
+        logger.warning(
+            f"Async auto mode reached max steps ({max_steps}) without completion"
+        )
+        return False

oagi/async_single_step.py ADDED Viewed

@@ -0,0 +1,83 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from pathlib import Path
+from .async_task import AsyncTask
+from .pil_image import PILImage
+from .types import Image, Step
+async def async_single_step(
+    task_description: str,
+    screenshot: str | bytes | Path | Image,
+    instruction: str | None = None,
+    api_key: str | None = None,
+    base_url: str | None = None,
+) -> Step:
+    """
+    Perform a single-step inference asynchronously without maintaining task state.
+    This is useful for one-off analyses where you don't need to maintain
+    a conversation or task context across multiple steps.
+    Args:
+        task_description: Description of the task to perform
+        screenshot: Screenshot as Image, bytes, or file path
+        instruction: Optional additional instruction for the task
+        api_key: OAGI API key (uses environment variable if not provided)
+        base_url: OAGI base URL (uses environment variable if not provided)
+    Returns:
+        Step: Object containing reasoning, actions, and completion status
+    Example:
+        >>> # Using with bytes
+        >>> import asyncio
+        >>> async def main():
+        ...     with open("screenshot.png", "rb") as f:
+        ...         screenshot_bytes = f.read()
+        ...     step = await async_single_step(
+        ...         "Click the submit button",
+        ...         screenshot=screenshot_bytes
+        ...     )
+        ...     print(f"Actions: {step.actions}")
+        >>> asyncio.run(main())
+        >>> # Using with file path
+        >>> step = await async_single_step(
+        ...     "Find the search box",
+        ...     screenshot="screenshot.png"
+        ... )
+        >>> # Using with PILImage
+        >>> image = PILImage.from_file("screenshot.png")
+        >>> step = await async_single_step(
+        ...     "Click next page",
+        ...     screenshot=image
+        ... )
+    """
+    # Handle different screenshot input types
+    if isinstance(screenshot, (str, Path)):
+        # Convert file path to PILImage
+        screenshot = PILImage.from_file(str(screenshot))
+    elif isinstance(screenshot, bytes):
+        # Convert bytes to PILImage
+        screenshot = PILImage.from_bytes(screenshot)
+    # Create a temporary task instance
+    task = AsyncTask(api_key=api_key, base_url=base_url)
+    try:
+        # Initialize task and perform single step
+        await task.init_task(task_description)
+        result = await task.step(screenshot, instruction=instruction)
+        return result
+    finally:
+        # Clean up resources
+        await task.close()

oagi/async_task.py ADDED Viewed

@@ -0,0 +1,117 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from .async_client import AsyncClient
+from .logging import get_logger
+from .sync_client import encode_screenshot_from_bytes
+from .types import Image, Step
+logger = get_logger("async_task")
+class AsyncTask:
+    """Async base class for task automation with the OAGI API."""
+    def __init__(
+        self,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        model: str = "vision-model-v1",
+    ):
+        self.client = AsyncClient(base_url=base_url, api_key=api_key)
+        self.api_key = self.client.api_key
+        self.base_url = self.client.base_url
+        self.task_id: str | None = None
+        self.task_description: str | None = None
+        self.model = model
+    async def init_task(self, task_desc: str, max_steps: int = 5):
+        """Initialize a new task with the given description."""
+        self.task_description = task_desc
+        response = await self.client.create_message(
+            model=self.model,
+            screenshot="",
+            task_description=self.task_description,
+            task_id=None,
+        )
+        self.task_id = response.task_id  # Reset task_id for new task
+        logger.info(f"Async task initialized: '{task_desc}' (max_steps: {max_steps})")
+    async def step(
+        self, screenshot: Image | bytes, instruction: str | None = None
+    ) -> Step:
+        """Send screenshot to the server and get the next actions.
+        Args:
+            screenshot: Screenshot as Image object or raw bytes
+            instruction: Optional additional instruction for this step (only works with existing task_id)
+        Returns:
+            Step: The actions and reasoning for this step
+        """
+        if not self.task_description:
+            raise ValueError("Task description must be set. Call init_task() first.")
+        logger.debug(f"Executing async step for task: '{self.task_description}'")
+        try:
+            # Convert Image to bytes using the protocol
+            if isinstance(screenshot, Image):
+                screenshot_bytes = screenshot.read()
+            else:
+                screenshot_bytes = screenshot
+            screenshot_b64 = encode_screenshot_from_bytes(screenshot_bytes)
+            # Call API
+            response = await self.client.create_message(
+                model=self.model,
+                screenshot=screenshot_b64,
+                task_description=self.task_description,
+                task_id=self.task_id,
+                instruction=instruction,
+            )
+            # Update task_id from response
+            if self.task_id != response.task_id:
+                if self.task_id is None:
+                    logger.debug(f"Task ID assigned: {response.task_id}")
+                else:
+                    logger.debug(
+                        f"Task ID changed: {self.task_id} -> {response.task_id}"
+                    )
+                self.task_id = response.task_id
+            # Convert API response to Step
+            result = Step(
+                reason=response.reason,
+                actions=response.actions,
+                stop=response.is_complete,
+            )
+            if response.is_complete:
+                logger.info(f"Async task completed after {response.current_step} steps")
+            else:
+                logger.debug(
+                    f"Async step {response.current_step} completed with {len(response.actions)} actions"
+                )
+            return result
+        except Exception as e:
+            logger.error(f"Error during async step execution: {e}")
+            raise
+    async def close(self):
+        """Close the underlying HTTP client to free resources."""
+        await self.client.close()
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.close()

oagi/pyautogui_action_handler.py CHANGED Viewed

@@ -10,10 +10,28 @@ import re
 import time
 import pyautogui
+from pydantic import BaseModel, Field
 from .types import Action, ActionType
+class PyautoguiConfig(BaseModel):
+    """Configuration for PyautoguiActionHandler."""
+    drag_duration: float = Field(
+        default=0.5, description="Duration for drag operations in seconds"
+    )
+    scroll_amount: int = Field(
+        default=30, description="Amount to scroll (positive for up, negative for down)"
+    )
+    wait_duration: float = Field(
+        default=1.0, description="Duration for wait actions in seconds"
+    )
+    action_pause: float = Field(
+        default=0.1, description="Pause between PyAutoGUI actions in seconds"
+    )
 class PyautoguiActionHandler:
     """
     Handles actions to be executed using PyAutoGUI.
@@ -29,11 +47,13 @@ class PyautoguiActionHandler:
         actions (list[Action]): List of actions to be processed and executed.
     """
-    def __init__(self):
+    def __init__(self, config: PyautoguiConfig | None = None):
+        # Use default config if none provided
+        self.config = config or PyautoguiConfig()
         # Get screen dimensions for coordinate denormalization
         self.screen_width, self.screen_height = pyautogui.size()
         # Set default delay between actions
-        pyautogui.PAUSE = 0.1
+        pyautogui.PAUSE = self.config.action_pause
     def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
         """Convert coordinates from 0-1000 range to actual screen coordinates."""
@@ -82,59 +102,74 @@ class PyautoguiActionHandler:
         keys = [key.strip() for key in args_str.split("+")]
         return keys
+    def _execute_single_action(self, action: Action) -> None:
+        """Execute a single action once."""
+        arg = action.argument.strip("()")  # Remove outer parentheses if present
+        match action.type:
+            case ActionType.CLICK:
+                x, y = self._parse_coords(arg)
+                pyautogui.click(x, y)
+            case ActionType.LEFT_DOUBLE:
+                x, y = self._parse_coords(arg)
+                pyautogui.doubleClick(x, y)
+            case ActionType.LEFT_TRIPLE:
+                x, y = self._parse_coords(arg)
+                pyautogui.tripleClick(x, y)
+            case ActionType.RIGHT_SINGLE:
+                x, y = self._parse_coords(arg)
+                pyautogui.rightClick(x, y)
+            case ActionType.DRAG:
+                x1, y1, x2, y2 = self._parse_drag_coords(arg)
+                pyautogui.moveTo(x1, y1)
+                pyautogui.dragTo(
+                    x2, y2, duration=self.config.drag_duration, button="left"
+                )
+            case ActionType.HOTKEY:
+                keys = self._parse_hotkey(arg)
+                pyautogui.hotkey(*keys)
+            case ActionType.TYPE:
+                # Remove quotes if present
+                text = arg.strip("\"'")
+                pyautogui.typewrite(text)
+            case ActionType.SCROLL:
+                x, y, direction = self._parse_scroll(arg)
+                pyautogui.moveTo(x, y)
+                scroll_amount = (
+                    self.config.scroll_amount
+                    if direction == "up"
+                    else -self.config.scroll_amount
+                )
+                pyautogui.scroll(scroll_amount)
+            case ActionType.FINISH:
+                # Task completion - no action needed
+                pass
+            case ActionType.WAIT:
+                # Wait for a short period
+                time.sleep(self.config.wait_duration)
+            case ActionType.CALL_USER:
+                # Call user - implementation depends on requirements
+                print("User intervention requested")
+            case _:
+                print(f"Unknown action type: {action.type}")
     def _execute_action(self, action: Action) -> None:
-        """Execute a single action."""
+        """Execute an action, potentially multiple times."""
         count = action.count or 1
-        arg = action.argument.strip("()")  # Remove outer parentheses if present
         for _ in range(count):
-            match action.type:
-                case ActionType.CLICK:
-                    x, y = self._parse_coords(arg)
-                    pyautogui.click(x, y)
-                case ActionType.LEFT_DOUBLE:
-                    x, y = self._parse_coords(arg)
-                    pyautogui.doubleClick(x, y)
-                case ActionType.RIGHT_SINGLE:
-                    x, y = self._parse_coords(arg)
-                    pyautogui.rightClick(x, y)
-                case ActionType.DRAG:
-                    x1, y1, x2, y2 = self._parse_drag_coords(arg)
-                    pyautogui.moveTo(x1, y1)
-                    pyautogui.dragTo(x2, y2, duration=0.5, button="left")
-                case ActionType.HOTKEY:
-                    keys = self._parse_hotkey(arg)
-                    pyautogui.hotkey(*keys)
-                case ActionType.TYPE:
-                    # Remove quotes if present
-                    text = arg.strip("\"'")
-                    pyautogui.typewrite(text)
-                case ActionType.SCROLL:
-                    x, y, direction = self._parse_scroll(arg)
-                    pyautogui.moveTo(x, y)
-                    scroll_amount = 5 if direction == "up" else -5
-                    pyautogui.scroll(scroll_amount)
-                case ActionType.FINISH:
-                    # Task completion - no action needed
-                    pass
-                case ActionType.WAIT:
-                    # Wait for a short period
-                    time.sleep(1)
-                case ActionType.CALL_USER:
-                    # Call user - implementation depends on requirements
-                    print("User intervention requested")
-                case _:
-                    print(f"Unknown action type: {action.type}")
+            self._execute_single_action(action)
     def __call__(self, actions: list[Action]) -> None:
         """Execute the provided list of actions."""

oagi/short_task.py CHANGED Viewed

@@ -41,12 +41,12 @@ class ShortTask(Task):
             logger.debug(f"Auto mode step {i + 1}/{max_steps}")
             image = image_provider()
             step = self.step(image)
-            if step.stop:
-                logger.info(f"Auto mode completed successfully after {i + 1} steps")
-                return True
             if executor:
                 logger.debug(f"Executing {len(step.actions)} actions")
                 executor(step.actions)
+            if step.stop:
+                logger.info(f"Auto mode completed successfully after {i + 1} steps")
+                return True
         logger.warning(f"Auto mode reached max steps ({max_steps}) without completion")
         return False

oagi/types/__init__.py CHANGED Viewed

@@ -7,6 +7,8 @@
 # -----------------------------------------------------------------------------
 from .action_handler import ActionHandler
+from .async_action_handler import AsyncActionHandler
+from .async_image_provider import AsyncImageProvider
 from .image import Image
 from .image_provider import ImageProvider
 from .models import Action, ActionType, ImageConfig, Step
@@ -18,5 +20,7 @@ __all__ = [
     "ImageConfig",
     "Step",
     "ActionHandler",
+    "AsyncActionHandler",
     "ImageProvider",
+    "AsyncImageProvider",
 ]

oagi/types/async_action_handler.py ADDED Viewed

@@ -0,0 +1,30 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from typing import Protocol
+from .models import Action
+class AsyncActionHandler(Protocol):
+    async def __call__(self, actions: list[Action]) -> None:
+        """
+        Asynchronously executes a list of actions.
+        This method takes a list of `Action` objects and executes them asynchronously.
+        It is used to perform operations represented by the `Action` instances. This
+        method does not return any value and modifies the system based on the input actions.
+        Parameters:
+            actions (list[Action]): A list of `Action` objects to be executed. Each
+            `Action` must encapsulate the logic that is intended to be applied
+            during the call.
+        Raises:
+            RuntimeError: If an error occurs during the execution of the actions.
+        """

oagi/types/async_image_provider.py ADDED Viewed

@@ -0,0 +1,37 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from typing import Protocol
+from .image import Image
+class AsyncImageProvider(Protocol):
+    async def __call__(self) -> Image:
+        """
+        Asynchronously provides an image.
+        This method is responsible for asynchronously capturing, generating, or retrieving
+        an image that can be used for task execution or analysis. The method should return
+        an object that implements the Image protocol.
+        Returns:
+            Image: An object implementing the Image protocol that represents
+                  the captured or generated image.
+        Raises:
+            RuntimeError: If an error occurs during image capture or generation.
+        """
+    async def last_image(self) -> Image:
+        """
+        Asynchronously returns the last captured image.
+        Returns:
+            Image: The last captured image.
+        """

oagi/types/models/action.py CHANGED Viewed

@@ -14,6 +14,7 @@ from pydantic import BaseModel, Field
 class ActionType(str, Enum):
     CLICK = "click"
     LEFT_DOUBLE = "left_double"
+    LEFT_TRIPLE = "left_triple"
     RIGHT_SINGLE = "right_single"
     DRAG = "drag"
     HOTKEY = "hotkey"

{oagi-0.3.0.dist-info → oagi-0.4.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: oagi
-Version: 0.3.0
+Version: 0.4.1
 Summary: Official API of OpenAGI Foundation
 Project-URL: Homepage, https://github.com/agiopen-org/oagi
 Author-email: OpenAGI Foundation <contact@agiopen.org>
@@ -82,6 +82,23 @@ completed = task.auto_mode(
 )
 ```
+Configure PyAutoGUI behavior with custom settings:
+```python
+from oagi import PyautoguiActionHandler, PyautoguiConfig
+# Customize action behavior
+config = PyautoguiConfig(
+    drag_duration=1.0,      # Slower drags for precision (default: 0.5)
+    scroll_amount=50,       # Larger scroll steps (default: 30)
+    wait_duration=2.0,      # Longer waits (default: 1.0)
+    action_pause=0.2,       # More pause between actions (default: 0.1)
+)
+executor = PyautoguiActionHandler(config=config)
+task.auto_mode("Complete form", executor=executor, image_provider=ScreenshotMaker())
+```
 ### Image Processing
 Process and optimize images before sending to API:
@@ -103,6 +120,31 @@ compressed = image.transform(config)
 step = single_step("Click button", screenshot=compressed)
 ```
+### Async Support
+Use async client for non-blocking operations and better concurrency:
+```python
+import asyncio
+from oagi import async_single_step, AsyncShortTask
+async def main():
+    # Single-step async analysis
+    step = await async_single_step(
+        "Find the search bar",
+        screenshot="screenshot.png"
+    )
+    print(f"Found {len(step.actions)} actions")
+    # Async task automation
+    task = AsyncShortTask()
+    async with task:
+        await task.init_task("Complete the form")
+        # ... continue with async operations
+asyncio.run(main())
+```
 ## Examples
 See the [`examples/`](examples/) directory for more usage patterns:

oagi-0.4.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,30 @@
+oagi/__init__.py,sha256=m-Z121YCIwQOPXpTC8kd_UIJizcX8QuHyrSSguQ0KE0,2187
+oagi/async_client.py,sha256=oDj4kIdtaV37uopoAeClCFQTxrYRwHV2HwMAcMdVYwE,8455
+oagi/async_pyautogui_action_handler.py,sha256=F-lKyePCONWI03WnSxpX_QwxONbvnfdQu51wTod6mdw,1614
+oagi/async_screenshot_maker.py,sha256=pI-dbLcYOzcO1ffgTmozAdbYJQNBPKA7hmqj1RxEmIY,1688
+oagi/async_short_task.py,sha256=jvFTbmXTxFlkpAwmWeZlxbTSv_RB7V561hxw6gUcigw,1961
+oagi/async_single_step.py,sha256=QawXO4GyfMz6O9jV8QBC1vKxFuS9vjKQxxJ1nwgHBzI,2838
+oagi/async_task.py,sha256=bclqtgg7mI2WAp-62jOz044tVk4wruycpn9NYDncnA8,4145
+oagi/exceptions.py,sha256=VMwVS8ouE9nHhBpN3AZMYt5_U2kGcihWaTnBhoQLquo,1662
+oagi/logging.py,sha256=CWe89mA5MKTipIvfrqSYkv2CAFNBSwHMDQMDkG_g64g,1350
+oagi/pil_image.py,sha256=Zp7YNwyE_AT25ZEFsWKbzMxbO8JOQsJ1Espph5ye8k8,3804
+oagi/pyautogui_action_handler.py,sha256=ix_Zl9uHch3Oz1H6bNEb7-1ee3-qiW_MsT-4SbWBf7g,6610
+oagi/screenshot_maker.py,sha256=sVuW7jn-K4FmLhmYI-akdNI-UVcTeBzh9P1_qJhoq1s,1282
+oagi/short_task.py,sha256=9l1PDX70vDUEX2CIJ66yaAtb96P3mK_m95JffspnYFI,1779
+oagi/single_step.py,sha256=djhGOHzA5Y3-9_ity9QiJr_ObZZ04blSmNZsLXXXfkg,2939
+oagi/sync_client.py,sha256=E6EgFIe-H91rdsPhF1puwrBTpOnKaL6JA1WHR4R-CLY,9395
+oagi/task.py,sha256=JfsugIhBrwDmi1xOEVQdqmXsGFK-H4p17-B4rM8kbWs,4001
+oagi/types/__init__.py,sha256=YXxL-30f92qAf9U6LZuVCtKFG-Pi3xahKedaNxyrxFE,766
+oagi/types/action_handler.py,sha256=NH8E-m5qpGqWcXzTSWfF7W0Xdp8SkzJsbhCmQ0B96cg,1075
+oagi/types/async_action_handler.py,sha256=k1AaqSkFcXlxwW8sn-w0WFHGsIqHFLbcOPrkknmSVug,1116
+oagi/types/async_image_provider.py,sha256=wnhRyPtTmuALt45Qore74-RCkP5yxU9sZGjvOzFqzOk,1170
+oagi/types/image.py,sha256=KgPCCTJ6D5vHIaGZdbTE7eQEa1WlT6G9tf59ZuUCV2U,537
+oagi/types/image_provider.py,sha256=oYFdOYznrK_VOR9egzOjw5wFM5w8EY2sY01pH0ANAgU,1112
+oagi/types/models/__init__.py,sha256=bVzzGxb6lVxAQyJpy0Z1QknSe-xC3g4OIDr7t-p_3Ys,467
+oagi/types/models/action.py,sha256=hh6mRRSSWgrW4jpZo71zGMCOcZpV5_COu4148uG6G48,967
+oagi/types/models/image_config.py,sha256=tl6abVg_-IAPLwpaWprgknXu7wRWriMg-AEVyUX73v0,1567
+oagi/types/models/step.py,sha256=RSI4H_2rrUBq_xyCoWKaq7JHdJWNobtQppaKC1l0aWU,471
+oagi-0.4.1.dist-info/METADATA,sha256=AGVosMgpoFwLAB9BWAknn7a3aXwVAiUAWvTQJm0w3RY,4620
+oagi-0.4.1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
+oagi-0.4.1.dist-info/licenses/LICENSE,sha256=sy5DLA2M29jFT4UfWsuBF9BAr3FnRkYtnAu6oDZiIf8,1075
+oagi-0.4.1.dist-info/RECORD,,

oagi-0.3.0.dist-info/RECORD DELETED Viewed

@@ -1,22 +0,0 @@
-oagi/__init__.py,sha256=1pewp0wOcGI8urjOOCskwiJC9VghhGCRpsslf-VUiLI,1493
-oagi/exceptions.py,sha256=VMwVS8ouE9nHhBpN3AZMYt5_U2kGcihWaTnBhoQLquo,1662
-oagi/logging.py,sha256=CWe89mA5MKTipIvfrqSYkv2CAFNBSwHMDQMDkG_g64g,1350
-oagi/pil_image.py,sha256=Zp7YNwyE_AT25ZEFsWKbzMxbO8JOQsJ1Espph5ye8k8,3804
-oagi/pyautogui_action_handler.py,sha256=LBWmtqkXzZSJo07s3uOw-NWUE9rZZtbNAx0YI83pCbk,5482
-oagi/screenshot_maker.py,sha256=sVuW7jn-K4FmLhmYI-akdNI-UVcTeBzh9P1_qJhoq1s,1282
-oagi/short_task.py,sha256=fJcirqD7X3_GyINTGdOoe6wi-VFHfP-C8m-zxCvgY5M,1779
-oagi/single_step.py,sha256=djhGOHzA5Y3-9_ity9QiJr_ObZZ04blSmNZsLXXXfkg,2939
-oagi/sync_client.py,sha256=E6EgFIe-H91rdsPhF1puwrBTpOnKaL6JA1WHR4R-CLY,9395
-oagi/task.py,sha256=JfsugIhBrwDmi1xOEVQdqmXsGFK-H4p17-B4rM8kbWs,4001
-oagi/types/__init__.py,sha256=dj_UWdpRzhuVyi-pegQAv2V0f1DxidFxjWUhpcWzYKE,608
-oagi/types/action_handler.py,sha256=NH8E-m5qpGqWcXzTSWfF7W0Xdp8SkzJsbhCmQ0B96cg,1075
-oagi/types/image.py,sha256=KgPCCTJ6D5vHIaGZdbTE7eQEa1WlT6G9tf59ZuUCV2U,537
-oagi/types/image_provider.py,sha256=oYFdOYznrK_VOR9egzOjw5wFM5w8EY2sY01pH0ANAgU,1112
-oagi/types/models/__init__.py,sha256=bVzzGxb6lVxAQyJpy0Z1QknSe-xC3g4OIDr7t-p_3Ys,467
-oagi/types/models/action.py,sha256=8Xd3IcH32ENq7uXczo-mbQ736yUOGxO_TaZTfHVRY7w,935
-oagi/types/models/image_config.py,sha256=tl6abVg_-IAPLwpaWprgknXu7wRWriMg-AEVyUX73v0,1567
-oagi/types/models/step.py,sha256=RSI4H_2rrUBq_xyCoWKaq7JHdJWNobtQppaKC1l0aWU,471
-oagi-0.3.0.dist-info/METADATA,sha256=BtkLuhcIXhL43C23nZa6uZNcUuhlhXjJ67OaaXxeEmI,3461
-oagi-0.3.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
-oagi-0.3.0.dist-info/licenses/LICENSE,sha256=sy5DLA2M29jFT4UfWsuBF9BAr3FnRkYtnAu6oDZiIf8,1075
-oagi-0.3.0.dist-info/RECORD,,

{oagi-0.3.0.dist-info → oagi-0.4.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{oagi-0.3.0.dist-info → oagi-0.4.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

oagi 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

Potentially problematic release.

oagi 0.3.0py3-none-any.whl → 0.4.1py3-none-any.whl