PyPI - oagi - Versions diffs - 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

oagi 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of oagi might be problematic. Click here for more details.

Files changed (24) hide show

oagi/__init__.py +30 -2
oagi/async_client.py +239 -0
oagi/async_pyautogui_action_handler.py +44 -0
oagi/async_screenshot_maker.py +47 -0
oagi/async_short_task.py +56 -0
oagi/async_single_step.py +83 -0
oagi/async_task.py +117 -0
oagi/pil_image.py +98 -0
oagi/pyautogui_action_handler.py +82 -51
oagi/screenshot_maker.py +16 -48
oagi/short_task.py +8 -0
oagi/single_step.py +4 -3
oagi/task.py +7 -2
oagi/types/__init__.py +14 -2
oagi/types/async_action_handler.py +30 -0
oagi/types/async_image_provider.py +37 -0
oagi/types/models/__init__.py +2 -1
oagi/types/models/image_config.py +47 -0
oagi-0.4.0.dist-info/METADATA +161 -0
oagi-0.4.0.dist-info/RECORD +30 -0
oagi-0.2.1.dist-info/METADATA +0 -55
oagi-0.2.1.dist-info/RECORD +0 -20
{oagi-0.2.1.dist-info → oagi-0.4.0.dist-info}/WHEEL +0 -0
{oagi-0.2.1.dist-info → oagi-0.4.0.dist-info}/licenses/LICENSE +0 -0

oagi/pil_image.py ADDED Viewed

@@ -0,0 +1,98 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+import io
+from typing import Optional
+import pyautogui
+from PIL import Image as PILImageLib
+from .types.models.image_config import ImageConfig
+class PILImage:
+    """PIL image wrapper with transformation capabilities."""
+    def __init__(self, image: PILImageLib.Image, config: ImageConfig | None = None):
+        """Initialize with a PIL image and optional config."""
+        self.image = image
+        self.config = config or ImageConfig()
+        self._cached_bytes: Optional[bytes] = None
+    @classmethod
+    def from_file(cls, path: str, config: ImageConfig | None = None) -> "PILImage":
+        """Create PILImage from file path."""
+        image = PILImageLib.open(path)
+        return cls(image, config)
+    @classmethod
+    def from_bytes(cls, data: bytes, config: ImageConfig | None = None) -> "PILImage":
+        """Create PILImage from raw bytes."""
+        image = PILImageLib.open(io.BytesIO(data))
+        return cls(image, config)
+    @classmethod
+    def from_screenshot(cls, config: ImageConfig | None = None) -> "PILImage":
+        """Create PILImage from screenshot."""
+        screenshot = pyautogui.screenshot()
+        return cls(screenshot, config)
+    def transform(self, config: ImageConfig) -> "PILImage":
+        """Apply transformations (resize) based on config and return new PILImage."""
+        # Apply resize if needed
+        transformed = self._resize(self.image, config)
+        # Return new PILImage with the config (format conversion happens on read())
+        return PILImage(transformed, config)
+    def _resize(
+        self, image: PILImageLib.Image, config: ImageConfig
+    ) -> PILImageLib.Image:
+        """Resize image based on config."""
+        if config.width or config.height:
+            # Get target dimensions (use original if not specified)
+            target_width = config.width or image.width
+            target_height = config.height or image.height
+            # Map resample string to PIL constant
+            resample_map = {
+                "NEAREST": PILImageLib.NEAREST,
+                "BILINEAR": PILImageLib.BILINEAR,
+                "BICUBIC": PILImageLib.BICUBIC,
+                "LANCZOS": PILImageLib.LANCZOS,
+            }
+            resample = resample_map[config.resample]
+            # Resize to exact dimensions
+            return image.resize((target_width, target_height), resample)
+        return image
+    def _convert_format(self, image: PILImageLib.Image) -> bytes:
+        """Convert image to configured format (PNG or JPEG)."""
+        buffer = io.BytesIO()
+        save_kwargs = {"format": self.config.format}
+        if self.config.format == "JPEG":
+            save_kwargs["quality"] = self.config.quality
+            # Convert RGBA to RGB for JPEG if needed
+            if image.mode == "RGBA":
+                rgb_image = PILImageLib.new("RGB", image.size, (255, 255, 255))
+                rgb_image.paste(image, mask=image.split()[3])
+                rgb_image.save(buffer, **save_kwargs)
+            else:
+                image.save(buffer, **save_kwargs)
+        elif self.config.format == "PNG":
+            save_kwargs["optimize"] = self.config.optimize
+            image.save(buffer, **save_kwargs)
+        return buffer.getvalue()
+    def read(self) -> bytes:
+        """Read image as bytes with current config (implements Image protocol)."""
+        if self._cached_bytes is None:
+            self._cached_bytes = self._convert_format(self.image)
+        return self._cached_bytes

oagi/pyautogui_action_handler.py CHANGED Viewed

@@ -10,10 +10,28 @@ import re
 import time
 import pyautogui
+from pydantic import BaseModel, Field
 from .types import Action, ActionType
+class PyautoguiConfig(BaseModel):
+    """Configuration for PyautoguiActionHandler."""
+    drag_duration: float = Field(
+        default=0.5, description="Duration for drag operations in seconds"
+    )
+    scroll_amount: int = Field(
+        default=30, description="Amount to scroll (positive for up, negative for down)"
+    )
+    wait_duration: float = Field(
+        default=1.0, description="Duration for wait actions in seconds"
+    )
+    action_pause: float = Field(
+        default=0.1, description="Pause between PyAutoGUI actions in seconds"
+    )
 class PyautoguiActionHandler:
     """
     Handles actions to be executed using PyAutoGUI.
@@ -29,11 +47,13 @@ class PyautoguiActionHandler:
         actions (list[Action]): List of actions to be processed and executed.
     """
-    def __init__(self):
+    def __init__(self, config: PyautoguiConfig | None = None):
+        # Use default config if none provided
+        self.config = config or PyautoguiConfig()
         # Get screen dimensions for coordinate denormalization
         self.screen_width, self.screen_height = pyautogui.size()
         # Set default delay between actions
-        pyautogui.PAUSE = 0.1
+        pyautogui.PAUSE = self.config.action_pause
     def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
         """Convert coordinates from 0-1000 range to actual screen coordinates."""
@@ -82,59 +102,70 @@ class PyautoguiActionHandler:
         keys = [key.strip() for key in args_str.split("+")]
         return keys
+    def _execute_single_action(self, action: Action) -> None:
+        """Execute a single action once."""
+        arg = action.argument.strip("()")  # Remove outer parentheses if present
+        match action.type:
+            case ActionType.CLICK:
+                x, y = self._parse_coords(arg)
+                pyautogui.click(x, y)
+            case ActionType.LEFT_DOUBLE:
+                x, y = self._parse_coords(arg)
+                pyautogui.doubleClick(x, y)
+            case ActionType.RIGHT_SINGLE:
+                x, y = self._parse_coords(arg)
+                pyautogui.rightClick(x, y)
+            case ActionType.DRAG:
+                x1, y1, x2, y2 = self._parse_drag_coords(arg)
+                pyautogui.moveTo(x1, y1)
+                pyautogui.dragTo(
+                    x2, y2, duration=self.config.drag_duration, button="left"
+                )
+            case ActionType.HOTKEY:
+                keys = self._parse_hotkey(arg)
+                pyautogui.hotkey(*keys)
+            case ActionType.TYPE:
+                # Remove quotes if present
+                text = arg.strip("\"'")
+                pyautogui.typewrite(text)
+            case ActionType.SCROLL:
+                x, y, direction = self._parse_scroll(arg)
+                pyautogui.moveTo(x, y)
+                scroll_amount = (
+                    self.config.scroll_amount
+                    if direction == "up"
+                    else -self.config.scroll_amount
+                )
+                pyautogui.scroll(scroll_amount)
+            case ActionType.FINISH:
+                # Task completion - no action needed
+                pass
+            case ActionType.WAIT:
+                # Wait for a short period
+                time.sleep(self.config.wait_duration)
+            case ActionType.CALL_USER:
+                # Call user - implementation depends on requirements
+                print("User intervention requested")
+            case _:
+                print(f"Unknown action type: {action.type}")
     def _execute_action(self, action: Action) -> None:
-        """Execute a single action."""
+        """Execute an action, potentially multiple times."""
         count = action.count or 1
-        arg = action.argument.strip("()")  # Remove outer parentheses if present
         for _ in range(count):
-            match action.type:
-                case ActionType.CLICK:
-                    x, y = self._parse_coords(arg)
-                    pyautogui.click(x, y)
-                case ActionType.LEFT_DOUBLE:
-                    x, y = self._parse_coords(arg)
-                    pyautogui.doubleClick(x, y)
-                case ActionType.RIGHT_SINGLE:
-                    x, y = self._parse_coords(arg)
-                    pyautogui.rightClick(x, y)
-                case ActionType.DRAG:
-                    x1, y1, x2, y2 = self._parse_drag_coords(arg)
-                    pyautogui.moveTo(x1, y1)
-                    pyautogui.dragTo(x2, y2, duration=0.5, button="left")
-                case ActionType.HOTKEY:
-                    keys = self._parse_hotkey(arg)
-                    pyautogui.hotkey(*keys)
-                case ActionType.TYPE:
-                    # Remove quotes if present
-                    text = arg.strip("\"'")
-                    pyautogui.typewrite(text)
-                case ActionType.SCROLL:
-                    x, y, direction = self._parse_scroll(arg)
-                    pyautogui.moveTo(x, y)
-                    scroll_amount = 5 if direction == "up" else -5
-                    pyautogui.scroll(scroll_amount)
-                case ActionType.FINISH:
-                    # Task completion - no action needed
-                    pass
-                case ActionType.WAIT:
-                    # Wait for a short period
-                    time.sleep(1)
-                case ActionType.CALL_USER:
-                    # Call user - implementation depends on requirements
-                    print("User intervention requested")
-                case _:
-                    print(f"Unknown action type: {action.type}")
+            self._execute_single_action(action)
     def __call__(self, actions: list[Action]) -> None:
         """Execute the provided list of actions."""

oagi/screenshot_maker.py CHANGED Viewed

@@ -6,68 +6,36 @@
 #  Licensed under the MIT License.
 # -----------------------------------------------------------------------------
-import io
 from typing import Optional
-import pyautogui
+from .pil_image import PILImage
 from .types import Image
-class FileImage:
-    def __init__(self, path: str):
-        self.path = path
-        with open(path, "rb") as f:
-            self.data = f.read()
-    def read(self) -> bytes:
-        return self.data
-class MockImage:
-    def read(self) -> bytes:
-        return b"mock screenshot data"
-class ScreenshotImage:
-    """Image class that wraps a pyautogui screenshot."""
-    def __init__(self, screenshot):
-        """Initialize with a PIL Image from pyautogui."""
-        self.screenshot = screenshot
-        self._cached_bytes: Optional[bytes] = None
-    def read(self) -> bytes:
-        """Convert the screenshot to bytes (PNG format)."""
-        if self._cached_bytes is None:
-            # Convert PIL Image to bytes
-            buffer = io.BytesIO()
-            self.screenshot.save(buffer, format="PNG")
-            self._cached_bytes = buffer.getvalue()
-        return self._cached_bytes
+from .types.models.image_config import ImageConfig
 class ScreenshotMaker:
     """Takes screenshots using pyautogui."""
-    def __init__(self):
-        self._last_screenshot: Optional[ScreenshotImage] = None
+    def __init__(self, config: ImageConfig | None = None):
+        self.config = config or ImageConfig()
+        self._last_image: Optional[PILImage] = None
     def __call__(self) -> Image:
-        """Take a screenshot and return it as an Image."""
-        # Take a screenshot using pyautogui
-        screenshot = pyautogui.screenshot()
+        """Take and process a screenshot."""
+        # Create PILImage from screenshot
+        pil_image = PILImage.from_screenshot()
-        # Wrap it in our ScreenshotImage class
-        screenshot_image = ScreenshotImage(screenshot)
+        # Apply transformation if config is set
+        if self.config:
+            pil_image = pil_image.transform(self.config)
-        # Store as the last screenshot
-        self._last_screenshot = screenshot_image
+        # Store as the last image
+        self._last_image = pil_image
-        return screenshot_image
+        return pil_image
     def last_image(self) -> Image:
         """Return the last screenshot taken, or take a new one if none exists."""
-        if self._last_screenshot is None:
+        if self._last_image is None:
             return self()
-        return self._last_screenshot
+        return self._last_image

oagi/short_task.py CHANGED Viewed

@@ -16,6 +16,14 @@ logger = get_logger("short_task")
 class ShortTask(Task):
     """Task implementation with automatic mode for short-duration tasks."""
+    def __init__(
+        self,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        model: str = "vision-model-v1",
+    ):
+        super().__init__(api_key=api_key, base_url=base_url, model=model)
     def auto_mode(
         self,
         task_desc: str,

oagi/single_step.py CHANGED Viewed

@@ -8,6 +8,7 @@
 from pathlib import Path
+from .pil_image import PILImage
 from .task import Task
 from .types import Image, Step
@@ -59,12 +60,12 @@ def single_step(
         ...     screenshot=image
         ... )
     """
-    # Convert file paths to bytes
+    # Convert file paths to bytes using PILImage
     if isinstance(screenshot, (str, Path)):
         path = Path(screenshot) if isinstance(screenshot, str) else screenshot
         if path.exists():
-            with open(path, "rb") as f:
-                screenshot_bytes = f.read()
+            pil_image = PILImage.from_file(str(path))
+            screenshot_bytes = pil_image.read()
         else:
             raise FileNotFoundError(f"Screenshot file not found: {path}")
     elif isinstance(screenshot, bytes):

oagi/task.py CHANGED Viewed

@@ -16,13 +16,18 @@ logger = get_logger("task")
 class Task:
     """Base class for task automation with the OAGI API."""
-    def __init__(self, api_key: str | None = None, base_url: str | None = None):
+    def __init__(
+        self,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        model: str = "vision-model-v1",
+    ):
         self.client = SyncClient(base_url=base_url, api_key=api_key)
         self.api_key = self.client.api_key
         self.base_url = self.client.base_url
         self.task_id: str | None = None
         self.task_description: str | None = None
-        self.model = "vision-model-v1"  # default model
+        self.model = model
     def init_task(self, task_desc: str, max_steps: int = 5):
         """Initialize a new task with the given description."""

oagi/types/__init__.py CHANGED Viewed

@@ -7,8 +7,20 @@
 # -----------------------------------------------------------------------------
 from .action_handler import ActionHandler
+from .async_action_handler import AsyncActionHandler
+from .async_image_provider import AsyncImageProvider
 from .image import Image
 from .image_provider import ImageProvider
-from .models import Action, ActionType, Step
+from .models import Action, ActionType, ImageConfig, Step
-__all__ = ["Action", "ActionType", "Image", "Step", "ActionHandler", "ImageProvider"]
+__all__ = [
+    "Action",
+    "ActionType",
+    "Image",
+    "ImageConfig",
+    "Step",
+    "ActionHandler",
+    "AsyncActionHandler",
+    "ImageProvider",
+    "AsyncImageProvider",
+]

oagi/types/async_action_handler.py ADDED Viewed

@@ -0,0 +1,30 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from typing import Protocol
+from .models import Action
+class AsyncActionHandler(Protocol):
+    async def __call__(self, actions: list[Action]) -> None:
+        """
+        Asynchronously executes a list of actions.
+        This method takes a list of `Action` objects and executes them asynchronously.
+        It is used to perform operations represented by the `Action` instances. This
+        method does not return any value and modifies the system based on the input actions.
+        Parameters:
+            actions (list[Action]): A list of `Action` objects to be executed. Each
+            `Action` must encapsulate the logic that is intended to be applied
+            during the call.
+        Raises:
+            RuntimeError: If an error occurs during the execution of the actions.
+        """

oagi/types/async_image_provider.py ADDED Viewed

@@ -0,0 +1,37 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from typing import Protocol
+from .image import Image
+class AsyncImageProvider(Protocol):
+    async def __call__(self) -> Image:
+        """
+        Asynchronously provides an image.
+        This method is responsible for asynchronously capturing, generating, or retrieving
+        an image that can be used for task execution or analysis. The method should return
+        an object that implements the Image protocol.
+        Returns:
+            Image: An object implementing the Image protocol that represents
+                  the captured or generated image.
+        Raises:
+            RuntimeError: If an error occurs during image capture or generation.
+        """
+    async def last_image(self) -> Image:
+        """
+        Asynchronously returns the last captured image.
+        Returns:
+            Image: The last captured image.
+        """

oagi/types/models/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@
 # -----------------------------------------------------------------------------
 from .action import Action, ActionType
+from .image_config import ImageConfig
 from .step import Step
-__all__ = ["Action", "ActionType", "Step"]
+__all__ = ["Action", "ActionType", "ImageConfig", "Step"]

oagi/types/models/image_config.py ADDED Viewed

@@ -0,0 +1,47 @@
+# -----------------------------------------------------------------------------
+#  Copyright (c) OpenAGI Foundation
+#  All rights reserved.
+#
+#  This file is part of the official API project.
+#  Licensed under the MIT License.
+# -----------------------------------------------------------------------------
+from typing import Literal
+from pydantic import BaseModel, Field, field_validator
+class ImageConfig(BaseModel):
+    """Configuration for image capture and processing."""
+    format: Literal["PNG", "JPEG"] = Field(
+        default="JPEG", description="Image format for encoding"
+    )
+    quality: int = Field(
+        default=85,
+        ge=1,
+        le=100,
+        description="JPEG quality (1-100, only applies to JPEG format)",
+    )
+    width: int | None = Field(
+        default=1260, description="Target width in pixels (will resize to exact size)"
+    )
+    height: int | None = Field(
+        default=700, description="Target height in pixels (will resize to exact size)"
+    )
+    optimize: bool = Field(
+        default=False,
+        description="Enable PNG optimization (only applies to PNG format)",
+    )
+    resample: Literal["NEAREST", "BILINEAR", "BICUBIC", "LANCZOS"] = Field(
+        default="LANCZOS", description="Resampling filter for resizing"
+    )
+    @field_validator("quality")
+    @classmethod
+    def validate_quality(cls, v: int, info) -> int:
+        """Validate quality parameter based on format."""
+        values = info.data
+        if values.get("format") == "PNG" and v != 85:
+            return 85
+        return v

oagi 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

oagi 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl