PyPI - oagi - Versions diffs - 0.0.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

oagi 0.0.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of oagi might be problematic. Click here for more details.

Files changed (23) hide show

oagi/__init__.py +53 -1
oagi/exceptions.py +75 -0
oagi/logging.py +45 -0
oagi/pyautogui_action_handler.py +146 -0
oagi/screenshot_maker.py +73 -0
oagi/short_task.py +44 -0
oagi/single_step.py +82 -0
oagi/sync_client.py +265 -0
oagi/task.py +109 -0
oagi/types/__init__.py +14 -0
oagi/types/action_handler.py +30 -0
oagi/types/image.py +17 -0
oagi/types/image_provider.py +34 -0
oagi/types/models/__init__.py +12 -0
oagi/types/models/action.py +32 -0
oagi/types/models/step.py +17 -0
{oagi-0.0.0.dist-info → oagi-0.2.0.dist-info}/METADATA +55 -31
oagi-0.2.0.dist-info/RECORD +20 -0
{oagi-0.0.0.dist-info → oagi-0.2.0.dist-info}/WHEEL +1 -2
{oagi-0.0.0.dist-info → oagi-0.2.0.dist-info}/licenses/LICENSE +21 -21
oagi/core.py +0 -2
oagi-0.0.0.dist-info/RECORD +0 -7
oagi-0.0.0.dist-info/top_level.txt +0 -1

oagi/__init__.py CHANGED Viewed

@@ -1 +1,53 @@
-from .core import hello
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from oagi.exceptions import (
+    APIError,
+    AuthenticationError,
+    ConfigurationError,
+    NetworkError,
+    NotFoundError,
+    OAGIError,
+    RateLimitError,
+    RequestTimeoutError,
+    ServerError,
+    ValidationError,
+)
+from oagi.pyautogui_action_handler import PyautoguiActionHandler
+from oagi.screenshot_maker import ScreenshotMaker
+from oagi.short_task import ShortTask
+from oagi.single_step import single_step
+from oagi.sync_client import ErrorDetail, ErrorResponse, LLMResponse, SyncClient
+from oagi.task import Task
+__all__ = [
+    # Core classes
+    "Task",
+    "ShortTask",
+    "SyncClient",
+    # Functions
+    "single_step",
+    # Handler classes
+    "PyautoguiActionHandler",
+    "ScreenshotMaker",
+    # Response models
+    "LLMResponse",
+    "ErrorResponse",
+    "ErrorDetail",
+    # Exceptions
+    "OAGIError",
+    "APIError",
+    "AuthenticationError",
+    "ConfigurationError",
+    "NetworkError",
+    "NotFoundError",
+    "RateLimitError",
+    "ServerError",
+    "RequestTimeoutError",
+    "ValidationError",
+]

oagi/exceptions.py ADDED Viewed

@@ -0,0 +1,75 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import httpx
+class OAGIError(Exception):
+    pass
+class APIError(OAGIError):
+    def __init__(
+        self,
+        message: str,
+        code: str | None = None,
+        status_code: int | None = None,
+        response: httpx.Response | None = None,
+    ):
+        """Initialize APIError.
+        Args:
+            message: Human-readable error message
+            code: API error code for programmatic handling
+            status_code: HTTP status code
+            response: Original HTTP response object
+        """
+        super().__init__(message)
+        self.message = message
+        self.code = code
+        self.status_code = status_code
+        self.response = response
+    def __str__(self) -> str:
+        if self.code:
+            return f"API Error [{self.code}]: {self.message}"
+        return f"API Error: {self.message}"
+class AuthenticationError(APIError):
+    pass
+class RateLimitError(APIError):
+    pass
+class ValidationError(APIError):
+    pass
+class NotFoundError(APIError):
+    pass
+class ServerError(APIError):
+    pass
+class NetworkError(OAGIError):
+    def __init__(self, message: str, original_error: Exception | None = None):
+        super().__init__(message)
+        self.original_error = original_error
+class RequestTimeoutError(NetworkError):
+    pass
+class ConfigurationError(OAGIError):
+    pass

oagi/logging.py ADDED Viewed

@@ -0,0 +1,45 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import logging
+import os
+def get_logger(name: str) -> logging.Logger:
+    """
+    Get a logger with the specified name under the 'oagi' namespace.
+    Log level is controlled by OAGI_LOG environment variable.
+    Valid values: DEBUG, INFO, WARNING, ERROR, CRITICAL
+    Default: INFO
+    """
+    logger = logging.getLogger(f"oagi.{name}")
+    oagi_root = logging.getLogger("oagi")
+    # Get log level from environment
+    log_level = os.getenv("OAGI_LOG", "INFO").upper()
+    # Convert string to logging level
+    try:
+        level = getattr(logging, log_level)
+    except AttributeError:
+        level = logging.INFO
+    # Configure root oagi logger once
+    if not oagi_root.handlers:
+        handler = logging.StreamHandler()
+        formatter = logging.Formatter(
+            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        )
+        handler.setFormatter(formatter)
+        oagi_root.addHandler(handler)
+    # Always update level in case environment variable changed
+    oagi_root.setLevel(level)
+    return logger

oagi/pyautogui_action_handler.py ADDED Viewed

@@ -0,0 +1,146 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import re
+import time
+import pyautogui
+from .types import Action, ActionType
+class PyautoguiActionHandler:
+    """
+    Handles actions to be executed using PyAutoGUI.
+    This class provides functionality for handling and executing a sequence of
+    actions using the PyAutoGUI library. It processes a list of actions and executes
+    them as per the implementation.
+    Methods:
+        __call__: Executes the provided list of actions.
+    Args:
+        actions (list[Action]): List of actions to be processed and executed.
+    """
+    def __init__(self):
+        # Get screen dimensions for coordinate denormalization
+        self.screen_width, self.screen_height = pyautogui.size()
+        # Set default delay between actions
+        pyautogui.PAUSE = 0.1
+    def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
+        """Convert coordinates from 0-1000 range to actual screen coordinates."""
+        screen_x = int(x * self.screen_width / 1000)
+        screen_y = int(y * self.screen_height / 1000)
+        return screen_x, screen_y
+    def _parse_coords(self, args_str: str) -> tuple[int, int]:
+        """Extract x, y coordinates from argument string."""
+        match = re.match(r"(\d+),\s*(\d+)", args_str)
+        if not match:
+            raise ValueError(f"Invalid coordinates format: {args_str}")
+        x, y = int(match.group(1)), int(match.group(2))
+        return self._denormalize_coords(x, y)
+    def _parse_drag_coords(self, args_str: str) -> tuple[int, int, int, int]:
+        """Extract x1, y1, x2, y2 coordinates from drag argument string."""
+        match = re.match(r"(\d+),\s*(\d+),\s*(\d+),\s*(\d+)", args_str)
+        if not match:
+            raise ValueError(f"Invalid drag coordinates format: {args_str}")
+        x1, y1, x2, y2 = (
+            int(match.group(1)),
+            int(match.group(2)),
+            int(match.group(3)),
+            int(match.group(4)),
+        )
+        x1, y1 = self._denormalize_coords(x1, y1)
+        x2, y2 = self._denormalize_coords(x2, y2)
+        return x1, y1, x2, y2
+    def _parse_scroll(self, args_str: str) -> tuple[int, int, str]:
+        """Extract x, y, direction from scroll argument string."""
+        match = re.match(r"(\d+),\s*(\d+),\s*(\w+)", args_str)
+        if not match:
+            raise ValueError(f"Invalid scroll format: {args_str}")
+        x, y = int(match.group(1)), int(match.group(2))
+        x, y = self._denormalize_coords(x, y)
+        direction = match.group(3).lower()
+        return x, y, direction
+    def _parse_hotkey(self, args_str: str) -> list[str]:
+        """Parse hotkey string into list of keys."""
+        # Remove parentheses if present
+        args_str = args_str.strip("()")
+        # Split by '+' to get individual keys
+        keys = [key.strip() for key in args_str.split("+")]
+        return keys
+    def _execute_action(self, action: Action) -> None:
+        """Execute a single action."""
+        count = action.count or 1
+        arg = action.argument.strip("()")  # Remove outer parentheses if present
+        for _ in range(count):
+            match action.type:
+                case ActionType.CLICK:
+                    x, y = self._parse_coords(arg)
+                    pyautogui.click(x, y)
+                case ActionType.LEFT_DOUBLE:
+                    x, y = self._parse_coords(arg)
+                    pyautogui.doubleClick(x, y)
+                case ActionType.RIGHT_SINGLE:
+                    x, y = self._parse_coords(arg)
+                    pyautogui.rightClick(x, y)
+                case ActionType.DRAG:
+                    x1, y1, x2, y2 = self._parse_drag_coords(arg)
+                    pyautogui.moveTo(x1, y1)
+                    pyautogui.dragTo(x2, y2, duration=0.5, button="left")
+                case ActionType.HOTKEY:
+                    keys = self._parse_hotkey(arg)
+                    pyautogui.hotkey(*keys)
+                case ActionType.TYPE:
+                    # Remove quotes if present
+                    text = arg.strip("\"'")
+                    pyautogui.typewrite(text)
+                case ActionType.SCROLL:
+                    x, y, direction = self._parse_scroll(arg)
+                    pyautogui.moveTo(x, y)
+                    scroll_amount = 5 if direction == "up" else -5
+                    pyautogui.scroll(scroll_amount)
+                case ActionType.FINISH:
+                    # Task completion - no action needed
+                    pass
+                case ActionType.WAIT:
+                    # Wait for a short period
+                    time.sleep(1)
+                case ActionType.CALL_USER:
+                    # Call user - implementation depends on requirements
+                    print("User intervention requested")
+                case _:
+                    print(f"Unknown action type: {action.type}")
+    def __call__(self, actions: list[Action]) -> None:
+        """Execute the provided list of actions."""
+        for action in actions:
+            try:
+                self._execute_action(action)
+            except Exception as e:
+                print(f"Error executing action {action.type}: {e}")
+                raise

oagi/screenshot_maker.py ADDED Viewed

@@ -0,0 +1,73 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import io
+from typing import Optional
+import pyautogui
+from .types import Image
+class FileImage:
+    def __init__(self, path: str):
+        self.path = path
+        with open(path, "rb") as f:
+            self.data = f.read()
+    def read(self) -> bytes:
+        return self.data
+class MockImage:
+    def read(self) -> bytes:
+        return b"mock screenshot data"
+class ScreenshotImage:
+    """Image class that wraps a pyautogui screenshot."""
+    def __init__(self, screenshot):
+        """Initialize with a PIL Image from pyautogui."""
+        self.screenshot = screenshot
+        self._cached_bytes: Optional[bytes] = None
+    def read(self) -> bytes:
+        """Convert the screenshot to bytes (PNG format)."""
+        if self._cached_bytes is None:
+            # Convert PIL Image to bytes
+            buffer = io.BytesIO()
+            self.screenshot.save(buffer, format="PNG")
+            self._cached_bytes = buffer.getvalue()
+        return self._cached_bytes
+class ScreenshotMaker:
+    """Takes screenshots using pyautogui."""
+    def __init__(self):
+        self._last_screenshot: Optional[ScreenshotImage] = None
+    def __call__(self) -> Image:
+        """Take a screenshot and return it as an Image."""
+        # Take a screenshot using pyautogui
+        screenshot = pyautogui.screenshot()
+        # Wrap it in our ScreenshotImage class
+        screenshot_image = ScreenshotImage(screenshot)
+        # Store as the last screenshot
+        self._last_screenshot = screenshot_image
+        return screenshot_image
+    def last_image(self) -> Image:
+        """Return the last screenshot taken, or take a new one if none exists."""
+        if self._last_screenshot is None:
+            return self()
+        return self._last_screenshot

oagi/short_task.py ADDED Viewed

@@ -0,0 +1,44 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from .logging import get_logger
+from .task import Task
+from .types import ActionHandler, ImageProvider
+logger = get_logger("short_task")
+class ShortTask(Task):
+    """Task implementation with automatic mode for short-duration tasks."""
+    def auto_mode(
+        self,
+        task_desc: str,
+        max_steps: int = 5,
+        executor: ActionHandler = None,
+        image_provider: ImageProvider = None,
+    ) -> bool:
+        """Run the task in automatic mode with the provided executor and image provider."""
+        logger.info(
+            f"Starting auto mode for task: '{task_desc}' (max_steps: {max_steps})"
+        )
+        self.init_task(task_desc, max_steps=max_steps)
+        for i in range(max_steps):
+            logger.debug(f"Auto mode step {i + 1}/{max_steps}")
+            image = image_provider()
+            step = self.step(image)
+            if step.stop:
+                logger.info(f"Auto mode completed successfully after {i + 1} steps")
+                return True
+            if executor:
+                logger.debug(f"Executing {len(step.actions)} actions")
+                executor(step.actions)
+        logger.warning(f"Auto mode reached max steps ({max_steps}) without completion")
+        return False

oagi/single_step.py ADDED Viewed

@@ -0,0 +1,82 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from pathlib import Path
+from .task import Task
+from .types import Image, Step
+def single_step(
+    task_description: str,
+    screenshot: str | bytes | Path | Image,
+    instruction: str | None = None,
+    api_key: str | None = None,
+    base_url: str | None = None,
+) -> Step:
+    """
+    Perform a single-step inference without maintaining task state.
+    This is useful for one-off analyses where you don't need to maintain
+    a conversation or task context across multiple steps.
+    Args:
+        task_description: Description of the task to perform
+        screenshot: Screenshot as Image, bytes, or file path
+        instruction: Optional additional instruction for the task
+        api_key: OAGI API key (uses environment variable if not provided)
+        base_url: OAGI base URL (uses environment variable if not provided)
+    Returns:
+        Step: Object containing reasoning, actions, and completion status
+    Example:
+        >>> # Using with bytes
+        >>> with open("screenshot.png", "rb") as f:
+        ...     image_bytes = f.read()
+        >>> step = single_step(
+        ...     task_description="Click the submit button",
+        ...     screenshot=image_bytes
+        ... )
+        >>> # Using with file path
+        >>> step = single_step(
+        ...     task_description="Fill in the form",
+        ...     screenshot=Path("screenshot.png"),
+        ...     instruction="Use test@example.com for email"
+        ... )
+        >>> # Using with Image object
+        >>> from oagi.types import Image
+        >>> image = Image(...)
+        >>> step = single_step(
+        ...     task_description="Navigate to settings",
+        ...     screenshot=image
+        ... )
+    """
+    # Convert file paths to bytes
+    if isinstance(screenshot, (str, Path)):
+        path = Path(screenshot) if isinstance(screenshot, str) else screenshot
+        if path.exists():
+            with open(path, "rb") as f:
+                screenshot_bytes = f.read()
+        else:
+            raise FileNotFoundError(f"Screenshot file not found: {path}")
+    elif isinstance(screenshot, bytes):
+        screenshot_bytes = screenshot
+    elif isinstance(screenshot, Image):
+        screenshot_bytes = screenshot.read()
+    else:
+        raise ValueError(
+            f"screenshot must be Image, bytes, str, or Path, got {type(screenshot)}"
+        )
+    # Use Task to perform single step
+    with Task(api_key=api_key, base_url=base_url) as task:
+        task.init_task(task_description)
+        return task.step(screenshot_bytes, instruction=instruction)

oagi/sync_client.py ADDED Viewed

@@ -0,0 +1,265 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import base64
+import os
+import httpx
+from pydantic import BaseModel
+from .exceptions import (
+    APIError,
+    AuthenticationError,
+    ConfigurationError,
+    NetworkError,
+    NotFoundError,
+    RateLimitError,
+    RequestTimeoutError,
+    ServerError,
+    ValidationError,
+)
+from .logging import get_logger
+from .types import Action
+logger = get_logger("sync_client")
+class Usage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+class ErrorDetail(BaseModel):
+    """Detailed error information."""
+    code: str
+    message: str
+class ErrorResponse(BaseModel):
+    """Standard error response format."""
+    error: ErrorDetail | None
+class LLMResponse(BaseModel):
+    id: str
+    task_id: str
+    object: str = "task.completion"
+    created: int
+    model: str
+    task_description: str
+    current_step: int
+    is_complete: bool
+    actions: list[Action]
+    reason: str | None = None
+    usage: Usage
+    error: ErrorDetail | None = None
+class SyncClient:
+    def __init__(self, base_url: str | None = None, api_key: str | None = None):
+        # Get from environment if not provided
+        self.base_url = base_url or os.getenv("OAGI_BASE_URL")
+        self.api_key = api_key or os.getenv("OAGI_API_KEY")
+        # Validate required configuration
+        if not self.base_url:
+            raise ConfigurationError(
+                "OAGI base URL must be provided either as 'base_url' parameter or "
+                "OAGI_BASE_URL environment variable"
+            )
+        if not self.api_key:
+            raise ConfigurationError(
+                "OAGI API key must be provided either as 'api_key' parameter or "
+                "OAGI_API_KEY environment variable"
+            )
+        self.base_url = self.base_url.rstrip("/")
+        self.client = httpx.Client(base_url=self.base_url)
+        self.timeout = 60
+        logger.info(f"SyncClient initialized with base_url: {self.base_url}")
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.client.close()
+    def close(self):
+        """Close the underlying httpx client"""
+        self.client.close()
+    def create_message(
+        self,
+        model: str,
+        screenshot: str,  # base64 encoded
+        task_description: str | None = None,
+        task_id: str | None = None,
+        instruction: str | None = None,
+        max_actions: int | None = 5,
+        api_version: str | None = None,
+    ) -> LLMResponse:
+        """
+        Call the /v1/message endpoint to analyze task and screenshot
+        Args:
+            model: The model to use for task analysis
+            screenshot: Base64-encoded screenshot image
+            task_description: Description of the task (required for new sessions)
+            task_id: Task ID for continuing existing task
+            instruction: Additional instruction when continuing a session (only works with task_id)
+            max_actions: Maximum number of actions to return (1-20)
+            api_version: API version header
+        Returns:
+            LLMResponse: The response from the API
+        Raises:
+            httpx.HTTPStatusError: For HTTP error responses
+        """
+        headers = {}
+        if api_version:
+            headers["x-api-version"] = api_version
+        if self.api_key:
+            headers["x-api-key"] = self.api_key
+        payload = {"model": model, "screenshot": screenshot}
+        if task_description is not None:
+            payload["task_description"] = task_description
+        if task_id is not None:
+            payload["task_id"] = task_id
+        if instruction is not None:
+            payload["instruction"] = instruction
+        if max_actions is not None:
+            payload["max_actions"] = max_actions
+        logger.info(f"Making API request to /v1/message with model: {model}")
+        logger.debug(
+            f"Request includes task_description: {task_description is not None}, task_id: {task_id is not None}"
+        )
+        try:
+            response = self.client.post(
+                "/v1/message", json=payload, headers=headers, timeout=self.timeout
+            )
+        except httpx.TimeoutException as e:
+            logger.error(f"Request timed out after {self.timeout} seconds")
+            raise RequestTimeoutError(
+                f"Request timed out after {self.timeout} seconds", e
+            )
+        except httpx.NetworkError as e:
+            logger.error(f"Network error: {e}")
+            raise NetworkError(f"Network error: {e}", e)
+        try:
+            response_data = response.json()
+        except ValueError:
+            # If response is not JSON, raise API error
+            logger.error(f"Non-JSON API response: {response.status_code}")
+            raise APIError(
+                f"Invalid response format (status {response.status_code})",
+                status_code=response.status_code,
+                response=response,
+            )
+        # Check if it's an error response (non-200 status or has error field)
+        if response.status_code != 200:
+            error_resp = ErrorResponse(**response_data)
+            if error_resp.error:
+                error_code = error_resp.error.code
+                error_msg = error_resp.error.message
+                logger.error(f"API Error [{error_code}]: {error_msg}")
+                # Map to specific exception types based on status code
+                exception_class = self._get_exception_class(response.status_code)
+                raise exception_class(
+                    error_msg,
+                    code=error_code,
+                    status_code=response.status_code,
+                    response=response,
+                )
+            else:
+                # Error response without error details
+                logger.error(
+                    f"API error response without details: {response.status_code}"
+                )
+                exception_class = self._get_exception_class(response.status_code)
+                raise exception_class(
+                    f"API error (status {response.status_code})",
+                    status_code=response.status_code,
+                    response=response,
+                )
+        # Parse successful response
+        result = LLMResponse(**response_data)
+        # Check if the response contains an error (even with 200 status)
+        if result.error:
+            logger.error(
+                f"API Error in response: [{result.error.code}]: {result.error.message}"
+            )
+            raise APIError(
+                result.error.message,
+                code=result.error.code,
+                status_code=200,
+                response=response,
+            )
+        logger.info(
+            f"API request successful - task_id: {result.task_id}, step: {result.current_step}, complete: {result.is_complete}"
+        )
+        logger.debug(f"Response included {len(result.actions)} actions")
+        return result
+    def _get_exception_class(self, status_code: int) -> type[APIError]:
+        """Get the appropriate exception class based on status code."""
+        status_map = {
+            401: AuthenticationError,
+            404: NotFoundError,
+            422: ValidationError,
+            429: RateLimitError,
+        }
+        if status_code >= 500:
+            return ServerError
+        return status_map.get(status_code, APIError)
+    def health_check(self) -> dict:
+        """
+        Call the /health endpoint for health check
+        Returns:
+            dict: Health check response
+        """
+        logger.debug("Making health check request")
+        try:
+            response = self.client.get("/health")
+            response.raise_for_status()
+            result = response.json()
+            logger.debug("Health check successful")
+            return result
+        except httpx.HTTPStatusError as e:
+            logger.warning(f"Health check failed: {e}")
+            raise
+def encode_screenshot_from_bytes(image_bytes: bytes) -> str:
+    """Helper function to encode image bytes to base64 string"""
+    return base64.b64encode(image_bytes).decode("utf-8")
+def encode_screenshot_from_file(image_path: str) -> str:
+    """Helper function to encode image file to base64 string"""
+    with open(image_path, "rb") as f:
+        return encode_screenshot_from_bytes(f.read())

oagi/task.py ADDED Viewed

@@ -0,0 +1,109 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from .logging import get_logger
+from .sync_client import SyncClient, encode_screenshot_from_bytes
+from .types import Image, Step
+logger = get_logger("task")
+class Task:
+    """Base class for task automation with the OAGI API."""
+    def __init__(self, api_key: str | None = None, base_url: str | None = None):
+        self.client = SyncClient(base_url=base_url, api_key=api_key)
+        self.api_key = self.client.api_key
+        self.base_url = self.client.base_url
+        self.task_id: str | None = None
+        self.task_description: str | None = None
+        self.model = "vision-model-v1"  # default model
+    def init_task(self, task_desc: str, max_steps: int = 5):
+        """Initialize a new task with the given description."""
+        self.task_description = task_desc
+        response = self.client.create_message(
+            model=self.model,
+            screenshot="",
+            task_description=self.task_description,
+            task_id=None,
+        )
+        self.task_id = response.task_id  # Reset task_id for new task
+        logger.info(f"Task initialized: '{task_desc}' (max_steps: {max_steps})")
+    def step(self, screenshot: Image | bytes, instruction: str | None = None) -> Step:
+        """Send screenshot to the server and get the next actions.
+        Args:
+            screenshot: Screenshot as Image object or raw bytes
+            instruction: Optional additional instruction for this step (only works with existing task_id)
+        Returns:
+            Step: The actions and reasoning for this step
+        """
+        if not self.task_description:
+            raise ValueError("Task description must be set. Call init_task() first.")
+        logger.debug(f"Executing step for task: '{self.task_description}'")
+        try:
+            # Convert Image to bytes using the protocol
+            if isinstance(screenshot, Image):
+                screenshot_bytes = screenshot.read()
+            else:
+                screenshot_bytes = screenshot
+            screenshot_b64 = encode_screenshot_from_bytes(screenshot_bytes)
+            # Call API
+            response = self.client.create_message(
+                model=self.model,
+                screenshot=screenshot_b64,
+                task_description=self.task_description,
+                task_id=self.task_id,
+                instruction=instruction,
+            )
+            # Update task_id from response
+            if self.task_id != response.task_id:
+                if self.task_id is None:
+                    logger.debug(f"Task ID assigned: {response.task_id}")
+                else:
+                    logger.debug(
+                        f"Task ID changed: {self.task_id} -> {response.task_id}"
+                    )
+                self.task_id = response.task_id
+            # Convert API response to Step
+            result = Step(
+                reason=response.reason,
+                actions=response.actions,
+                stop=response.is_complete,
+            )
+            if response.is_complete:
+                logger.info(f"Task completed after {response.current_step} steps")
+            else:
+                logger.debug(
+                    f"Step {response.current_step} completed with {len(response.actions)} actions"
+                )
+            return result
+        except Exception as e:
+            logger.error(f"Error during step execution: {e}")
+            raise
+    def close(self):
+        """Close the underlying HTTP client to free resources."""
+        self.client.close()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()

oagi/types/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from .action_handler import ActionHandler
+from .image import Image
+from .image_provider import ImageProvider
+from .models import Action, ActionType, Step
+__all__ = ["Action", "ActionType", "Image", "Step", "ActionHandler", "ImageProvider"]

oagi/types/action_handler.py ADDED Viewed

@@ -0,0 +1,30 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from typing import Protocol
+from .models import Action
+class ActionHandler(Protocol):
+    def __call__(self, actions: list[Action]) -> None:
+        """
+        Executes a list of actions.
+        This method takes a list of `Action` objects and executes them. It is used
+        to perform operations represented by the `Action` instances. This method
+        does not return any value and modifies the system based on the input actions.
+        Parameters:
+            actions (list[Action]): A list of `Action` objects to be executed. Each
+            `Action` must encapsulate the logic that is intended to be applied
+            during the call.
+        Raises:
+            RuntimeError: If an error occurs during the execution of the actions.
+        """

oagi/types/image.py ADDED Viewed

@@ -0,0 +1,17 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from typing import Protocol, runtime_checkable
+@runtime_checkable
+class Image(Protocol):
+    """Protocol for image objects that can be read as bytes."""
+    def read(self) -> bytes:
+        """Read the image data as bytes."""

oagi/types/image_provider.py ADDED Viewed

@@ -0,0 +1,34 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from typing import Protocol
+from .image import Image
+class ImageProvider(Protocol):
+    def __call__(self) -> Image:
+        """
+        Represents the functionality to invoke the callable object and produce an Image
+        result. Typically used to process or generate images using the defined logic
+        within the __call__ method.
+        Returns:
+            Image: The resulting image output from the callable logic.
+        """
+    def last_image(self) -> Image:
+        """
+        Returns the last captured image.
+        This method retrieves the most recent image that was captured and stored
+        in memory. If there are no images available, the method may return None.
+        Returns:
+            Image: The last captured image, or None if no images are available.
+        """

oagi/types/models/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from .action import Action, ActionType
+from .step import Step
+__all__ = ["Action", "ActionType", "Step"]

oagi/types/models/action.py ADDED Viewed

@@ -0,0 +1,32 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from enum import Enum
+from pydantic import BaseModel, Field
+class ActionType(str, Enum):
+    CLICK = "click"
+    LEFT_DOUBLE = "left_double"
+    RIGHT_SINGLE = "right_single"
+    DRAG = "drag"
+    HOTKEY = "hotkey"
+    TYPE = "type"
+    SCROLL = "scroll"
+    FINISH = "finish"
+    WAIT = "wait"
+    CALL_USER = "call_user"
+class Action(BaseModel):
+    type: ActionType = Field(..., description="Type of action to perform")
+    argument: str = Field(..., description="Action argument in the specified format")
+    count: int | None = Field(
+        default=1, ge=1, description="Number of times to repeat the action"
+    )

oagi/types/models/step.py ADDED Viewed

@@ -0,0 +1,17 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from pydantic import BaseModel
+from .action import Action
+class Step(BaseModel):
+    reason: str | None = None
+    actions: list[Action]
+    stop: bool = False

{oagi-0.0.0.dist-info → oagi-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,31 +1,55 @@
-Metadata-Version: 2.4
-Name: oagi
-Version: 0.0.0
-Summary: Official API of OpenAGI Foundation
-Author-email: OpenAGI Foundation <contact@agiopen.org>
-License: MIT License
-        Copyright (c) 2025 OpenAGI Foundation
-        Permission is hereby granted, free of charge, to any person obtaining a copy
-        of this software and associated documentation files (the "Software"), to deal
-        in the Software without restriction, including without limitation the rights
-        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-        copies of the Software, and to permit persons to whom the Software is
-        furnished to do so, subject to the following conditions:
-        The above copyright notice and this permission notice shall be included in all
-        copies or substantial portions of the Software.
-        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-        SOFTWARE.
-Project-URL: Homepage, https://github.com/agiopen-org/oagi
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Dynamic: license-file
+Metadata-Version: 2.3
+Name: oagi
+Version: 0.2.0
+Summary: Official API of OpenAGI Foundation
+Project-URL: Homepage, https://github.com/agiopen-org/oagi
+Author-email: OpenAGI Foundation <contact@agiopen.org>
+License: MIT License
+        Copyright (c) 2025 OpenAGI Foundation
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Requires-Python: >=3.10
+Requires-Dist: httpx>=0.28.0
+Requires-Dist: pillow>=11.3.0
+Requires-Dist: pyautogui>=0.9.54
+Requires-Dist: pydantic>=2.0.0
+Description-Content-Type: text/markdown
+# OAGI Python SDK
+## Basic Usage
+```bash
+pip install oagi # python >= 3.10
+```
+```bash
+export OAGI_BASE_URL=""
+export OAGI_API_KEY="sk-xxxx"
+```
+```python
+from oagi import PyautoguiActionHandler, ScreenshotMaker, ShortTask
+short_task = ShortTask()
+is_completed = short_task.auto_mode(
+    "Search weather with Google",
+    max_steps=5,
+    executor=PyautoguiActionHandler(),
+    image_provider=(sm := ScreenshotMaker()),
+)
+```

oagi-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,20 @@
+oagi/__init__.py,sha256=ms9ahLdHNrMWtiqX93q8Iv55ag__tO4Id0DQ3hA2TVM,1347
+oagi/exceptions.py,sha256=VMwVS8ouE9nHhBpN3AZMYt5_U2kGcihWaTnBhoQLquo,1662
+oagi/logging.py,sha256=CWe89mA5MKTipIvfrqSYkv2CAFNBSwHMDQMDkG_g64g,1350
+oagi/pyautogui_action_handler.py,sha256=LBWmtqkXzZSJo07s3uOw-NWUE9rZZtbNAx0YI83pCbk,5482
+oagi/screenshot_maker.py,sha256=lyJSMFagHeaqg59CQGMTqLvSzQN_pBbhbV2oIFG46vA,2077
+oagi/short_task.py,sha256=ofcMi7vbu9W1MCSGOk_FNEHJcB02pfgNcx1-Y8UkpJY,1552
+oagi/single_step.py,sha256=JEsF7ABa4wwW5Pi5AfjeKzyuKhC4kC4fcotnmnNye5o,2874
+oagi/sync_client.py,sha256=XIuqAHD56BkBz5v4HshgWpjmfzs_z7TQbjDEzIcnKJA,8678
+oagi/task.py,sha256=NmpNMu8CJll50zGsGtVie1kdpKeWnAAWudEa-aasBbU,3959
+oagi/types/__init__.py,sha256=eh-1IEqMTY2hUrvQJeTg6vsvlE6F4Iz5C0_K86AnWn8,549
+oagi/types/action_handler.py,sha256=NH8E-m5qpGqWcXzTSWfF7W0Xdp8SkzJsbhCmQ0B96cg,1075
+oagi/types/image.py,sha256=KgPCCTJ6D5vHIaGZdbTE7eQEa1WlT6G9tf59ZuUCV2U,537
+oagi/types/image_provider.py,sha256=oYFdOYznrK_VOR9egzOjw5wFM5w8EY2sY01pH0ANAgU,1112
+oagi/types/models/__init__.py,sha256=4qhKxWXsXEVzD6U_RM6PXR45os765qigtZs1BsS4WHg,414
+oagi/types/models/action.py,sha256=8Xd3IcH32ENq7uXczo-mbQ736yUOGxO_TaZTfHVRY7w,935
+oagi/types/models/step.py,sha256=RSI4H_2rrUBq_xyCoWKaq7JHdJWNobtQppaKC1l0aWU,471
+oagi-0.2.0.dist-info/METADATA,sha256=nk3a9mv2DvZ1LKEpV-1nHH1TEbrqKv8OALN_8LJ47tQ,2066
+oagi-0.2.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
+oagi-0.2.0.dist-info/licenses/LICENSE,sha256=sy5DLA2M29jFT4UfWsuBF9BAr3FnRkYtnAu6oDZiIf8,1075
+oagi-0.2.0.dist-info/RECORD,,

{oagi-0.0.0.dist-info → oagi-0.2.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,4 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: hatchling 1.26.3
 Root-Is-Purelib: true
 Tag: py3-none-any

{oagi-0.0.0.dist-info → oagi-0.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

@@ -1,21 +1,21 @@
-MIT License
-Copyright (c) 2025 OpenAGI Foundation
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+MIT License
+Copyright (c) 2025 OpenAGI Foundation
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

oagi/core.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- def hello():
2	- return "Hello from OpenAGI Foundation!"

oagi-0.0.0.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-oagi/__init__.py,sha256=K1GAo2fdj3I1a2lP7RIu-ViQ52DJ84qV5imkQNkF0JE,25
-oagi/core.py,sha256=eU5aYaEmmgcUSmiPnRB9njRUI4Xrij2HcPunh67T86M,57
-oagi-0.0.0.dist-info/licenses/LICENSE,sha256=xHvNtuFT_mr6qQ1vGCphFj9r4Jc6h4VJLXTVYkFzgWM,1096
-oagi-0.0.0.dist-info/METADATA,sha256=h-xCKLcBqz9MiIIUELPUoK1Hz60Snvb8UXdMlCsGUDQ,1574
-oagi-0.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-oagi-0.0.0.dist-info/top_level.txt,sha256=t4TE_HUmY4z48HHEpgpmRYWnHdmXKzSdjzLxsx7Gkd0,5
-oagi-0.0.0.dist-info/RECORD,,

oagi-0.0.0.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- oagi

oagi 0.0.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

Potentially problematic release.

oagi 0.0.0py3-none-any.whl → 0.2.0py3-none-any.whl