oagi 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of oagi might be problematic. Click here for more details.

oagi/__init__.py CHANGED
@@ -6,6 +6,12 @@
6
6
  # Licensed under the MIT License.
7
7
  # -----------------------------------------------------------------------------
8
8
 
9
+ from oagi.async_client import AsyncClient
10
+ from oagi.async_pyautogui_action_handler import AsyncPyautoguiActionHandler
11
+ from oagi.async_screenshot_maker import AsyncScreenshotMaker
12
+ from oagi.async_short_task import AsyncShortTask
13
+ from oagi.async_single_step import async_single_step
14
+ from oagi.async_task import AsyncTask
9
15
  from oagi.exceptions import (
10
16
  APIError,
11
17
  AuthenticationError,
@@ -19,26 +25,42 @@ from oagi.exceptions import (
19
25
  ValidationError,
20
26
  )
21
27
  from oagi.pil_image import PILImage
22
- from oagi.pyautogui_action_handler import PyautoguiActionHandler
28
+ from oagi.pyautogui_action_handler import PyautoguiActionHandler, PyautoguiConfig
23
29
  from oagi.screenshot_maker import ScreenshotMaker
24
30
  from oagi.short_task import ShortTask
25
31
  from oagi.single_step import single_step
26
32
  from oagi.sync_client import ErrorDetail, ErrorResponse, LLMResponse, SyncClient
27
33
  from oagi.task import Task
28
- from oagi.types import ImageConfig
34
+ from oagi.types import (
35
+ AsyncActionHandler,
36
+ AsyncImageProvider,
37
+ ImageConfig,
38
+ )
29
39
 
30
40
  __all__ = [
31
- # Core classes
41
+ # Core sync classes
32
42
  "Task",
33
43
  "ShortTask",
34
44
  "SyncClient",
45
+ # Core async classes
46
+ "AsyncTask",
47
+ "AsyncShortTask",
48
+ "AsyncClient",
35
49
  # Functions
36
50
  "single_step",
51
+ "async_single_step",
37
52
  # Image classes
38
53
  "PILImage",
39
54
  # Handler classes
40
55
  "PyautoguiActionHandler",
56
+ "PyautoguiConfig",
41
57
  "ScreenshotMaker",
58
+ # Async handler classes
59
+ "AsyncPyautoguiActionHandler",
60
+ "AsyncScreenshotMaker",
61
+ # Async protocols
62
+ "AsyncActionHandler",
63
+ "AsyncImageProvider",
42
64
  # Configuration
43
65
  "ImageConfig",
44
66
  # Response models
oagi/async_client.py ADDED
@@ -0,0 +1,239 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import os
10
+ from functools import wraps
11
+
12
+ import httpx
13
+
14
+ from .exceptions import (
15
+ APIError,
16
+ AuthenticationError,
17
+ ConfigurationError,
18
+ NetworkError,
19
+ NotFoundError,
20
+ RateLimitError,
21
+ RequestTimeoutError,
22
+ ServerError,
23
+ ValidationError,
24
+ )
25
+ from .logging import get_logger
26
+ from .sync_client import ErrorResponse, LLMResponse
27
+
28
+ logger = get_logger("async_client")
29
+
30
+
31
+ def async_log_trace_on_failure(func):
32
+ """Async decorator that logs trace ID when a method fails."""
33
+
34
+ @wraps(func)
35
+ async def wrapper(*args, **kwargs):
36
+ try:
37
+ return await func(*args, **kwargs)
38
+ except Exception as e:
39
+ # Try to get response from the exception if it has one
40
+ if (response := getattr(e, "response", None)) is not None:
41
+ logger.error(f"Request Id: {response.headers.get('x-request-id', '')}")
42
+ logger.error(f"Trace Id: {response.headers.get('x-trace-id', '')}")
43
+ raise
44
+
45
+ return wrapper
46
+
47
+
48
+ class AsyncClient:
49
+ """Async HTTP client for the OAGI API."""
50
+
51
+ def __init__(self, base_url: str | None = None, api_key: str | None = None):
52
+ # Get from environment if not provided
53
+ self.base_url = base_url or os.getenv("OAGI_BASE_URL")
54
+ self.api_key = api_key or os.getenv("OAGI_API_KEY")
55
+
56
+ # Validate required configuration
57
+ if not self.base_url:
58
+ raise ConfigurationError(
59
+ "OAGI base URL must be provided either as 'base_url' parameter or "
60
+ "OAGI_BASE_URL environment variable"
61
+ )
62
+
63
+ if not self.api_key:
64
+ raise ConfigurationError(
65
+ "OAGI API key must be provided either as 'api_key' parameter or "
66
+ "OAGI_API_KEY environment variable"
67
+ )
68
+
69
+ self.base_url = self.base_url.rstrip("/")
70
+ self.client = httpx.AsyncClient(base_url=self.base_url)
71
+ self.timeout = 60
72
+
73
+ logger.info(f"AsyncClient initialized with base_url: {self.base_url}")
74
+
75
+ async def __aenter__(self):
76
+ return self
77
+
78
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
79
+ await self.client.aclose()
80
+
81
+ async def close(self):
82
+ """Close the underlying httpx async client"""
83
+ await self.client.aclose()
84
+
85
+ @async_log_trace_on_failure
86
+ async def create_message(
87
+ self,
88
+ model: str,
89
+ screenshot: str, # base64 encoded
90
+ task_description: str | None = None,
91
+ task_id: str | None = None,
92
+ instruction: str | None = None,
93
+ max_actions: int | None = 5,
94
+ api_version: str | None = None,
95
+ ) -> LLMResponse:
96
+ """
97
+ Call the /v1/message endpoint to analyze task and screenshot
98
+
99
+ Args:
100
+ model: The model to use for task analysis
101
+ screenshot: Base64-encoded screenshot image
102
+ task_description: Description of the task (required for new sessions)
103
+ task_id: Task ID for continuing existing task
104
+ instruction: Additional instruction when continuing a session (only works with task_id)
105
+ max_actions: Maximum number of actions to return (1-20)
106
+ api_version: API version header
107
+
108
+ Returns:
109
+ LLMResponse: The response from the API
110
+
111
+ Raises:
112
+ httpx.HTTPStatusError: For HTTP error responses
113
+ """
114
+ headers = {}
115
+ if api_version:
116
+ headers["x-api-version"] = api_version
117
+ if self.api_key:
118
+ headers["x-api-key"] = self.api_key
119
+
120
+ payload = {"model": model, "screenshot": screenshot}
121
+
122
+ if task_description is not None:
123
+ payload["task_description"] = task_description
124
+ if task_id is not None:
125
+ payload["task_id"] = task_id
126
+ if instruction is not None:
127
+ payload["instruction"] = instruction
128
+ if max_actions is not None:
129
+ payload["max_actions"] = max_actions
130
+
131
+ logger.info(f"Making async API request to /v1/message with model: {model}")
132
+ logger.debug(
133
+ f"Request includes task_description: {task_description is not None}, task_id: {task_id is not None}"
134
+ )
135
+
136
+ try:
137
+ response = await self.client.post(
138
+ "/v1/message", json=payload, headers=headers, timeout=self.timeout
139
+ )
140
+ except httpx.TimeoutException as e:
141
+ logger.error(f"Request timed out after {self.timeout} seconds")
142
+ raise RequestTimeoutError(
143
+ f"Request timed out after {self.timeout} seconds", e
144
+ )
145
+ except httpx.NetworkError as e:
146
+ logger.error(f"Network error: {e}")
147
+ raise NetworkError(f"Network error: {e}", e)
148
+
149
+ try:
150
+ response_data = response.json()
151
+ except ValueError:
152
+ # If response is not JSON, raise API error
153
+ logger.error(f"Non-JSON API response: {response.status_code}")
154
+ raise APIError(
155
+ f"Invalid response format (status {response.status_code})",
156
+ status_code=response.status_code,
157
+ response=response,
158
+ )
159
+
160
+ # Check if it's an error response (non-200 status or has error field)
161
+ if response.status_code != 200:
162
+ error_resp = ErrorResponse(**response_data)
163
+ if error_resp.error:
164
+ error_code = error_resp.error.code
165
+ error_msg = error_resp.error.message
166
+ logger.error(f"API Error [{error_code}]: {error_msg}")
167
+
168
+ # Map to specific exception types based on status code
169
+ exception_class = self._get_exception_class(response.status_code)
170
+ raise exception_class(
171
+ error_msg,
172
+ code=error_code,
173
+ status_code=response.status_code,
174
+ response=response,
175
+ )
176
+ else:
177
+ # Error response without error details
178
+ logger.error(
179
+ f"API error response without details: {response.status_code}"
180
+ )
181
+ exception_class = self._get_exception_class(response.status_code)
182
+ raise exception_class(
183
+ f"API error (status {response.status_code})",
184
+ status_code=response.status_code,
185
+ response=response,
186
+ )
187
+
188
+ # Parse successful response
189
+ result = LLMResponse(**response_data)
190
+
191
+ # Check if the response contains an error (even with 200 status)
192
+ if result.error:
193
+ logger.error(
194
+ f"API Error in response: [{result.error.code}]: {result.error.message}"
195
+ )
196
+ raise APIError(
197
+ result.error.message,
198
+ code=result.error.code,
199
+ status_code=200,
200
+ response=response,
201
+ )
202
+
203
+ logger.info(
204
+ f"Async API request successful - task_id: {result.task_id}, step: {result.current_step}, complete: {result.is_complete}"
205
+ )
206
+ logger.debug(f"Response included {len(result.actions)} actions")
207
+ return result
208
+
209
+ def _get_exception_class(self, status_code: int) -> type[APIError]:
210
+ """Get the appropriate exception class based on status code."""
211
+ status_map = {
212
+ 401: AuthenticationError,
213
+ 404: NotFoundError,
214
+ 422: ValidationError,
215
+ 429: RateLimitError,
216
+ }
217
+
218
+ if status_code >= 500:
219
+ return ServerError
220
+
221
+ return status_map.get(status_code, APIError)
222
+
223
+ async def health_check(self) -> dict:
224
+ """
225
+ Call the /health endpoint for health check
226
+
227
+ Returns:
228
+ dict: Health check response
229
+ """
230
+ logger.debug("Making async health check request")
231
+ try:
232
+ response = await self.client.get("/health")
233
+ response.raise_for_status()
234
+ result = response.json()
235
+ logger.debug("Async health check successful")
236
+ return result
237
+ except httpx.HTTPStatusError as e:
238
+ logger.warning(f"Async health check failed: {e}")
239
+ raise
@@ -0,0 +1,44 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import asyncio
10
+
11
+ from .pyautogui_action_handler import PyautoguiActionHandler, PyautoguiConfig
12
+ from .types import Action
13
+
14
+
15
+ class AsyncPyautoguiActionHandler:
16
+ """
17
+ Async wrapper for PyautoguiActionHandler that runs actions in a thread pool.
18
+
19
+ This allows PyAutoGUI operations to be non-blocking in async contexts,
20
+ enabling concurrent execution of other async tasks while GUI actions are performed.
21
+ """
22
+
23
+ def __init__(self, config: PyautoguiConfig | None = None):
24
+ """Initialize with optional configuration.
25
+
26
+ Args:
27
+ config: PyautoguiConfig instance for customizing behavior
28
+ """
29
+ self.sync_handler = PyautoguiActionHandler(config=config)
30
+ self.config = config or PyautoguiConfig()
31
+
32
+ async def __call__(self, actions: list[Action]) -> None:
33
+ """
34
+ Execute actions asynchronously using a thread pool executor.
35
+
36
+ This prevents PyAutoGUI operations from blocking the async event loop,
37
+ allowing other coroutines to run while GUI actions are being performed.
38
+
39
+ Args:
40
+ actions: List of actions to execute
41
+ """
42
+ loop = asyncio.get_event_loop()
43
+ # Run the synchronous handler in a thread pool to avoid blocking
44
+ await loop.run_in_executor(None, self.sync_handler, actions)
@@ -0,0 +1,47 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import asyncio
10
+
11
+ from .screenshot_maker import ScreenshotMaker
12
+ from .types import Image, ImageConfig
13
+
14
+
15
+ class AsyncScreenshotMaker:
16
+ """
17
+ Async wrapper for ScreenshotMaker that captures screenshots in a thread pool.
18
+
19
+ This allows screenshot capture to be non-blocking in async contexts,
20
+ enabling concurrent execution of other async tasks while screenshots are taken.
21
+ """
22
+
23
+ def __init__(self, config: ImageConfig | None = None):
24
+ """Initialize with optional image configuration.
25
+
26
+ Args:
27
+ config: ImageConfig instance for customizing screenshot format and quality
28
+ """
29
+ self.sync_screenshot_maker = ScreenshotMaker(config=config)
30
+ self.config = config
31
+
32
+ async def __call__(self) -> Image:
33
+ """
34
+ Capture a screenshot asynchronously using a thread pool executor.
35
+
36
+ This prevents screenshot capture from blocking the async event loop,
37
+ allowing other coroutines to run while the screenshot is being taken.
38
+
39
+ Returns:
40
+ Image: The captured screenshot as a PILImage
41
+ """
42
+ loop = asyncio.get_event_loop()
43
+ # Run the synchronous screenshot capture in a thread pool to avoid blocking
44
+ return await loop.run_in_executor(None, self.sync_screenshot_maker)
45
+
46
+ async def last_image(self) -> Image:
47
+ return self.sync_screenshot_maker.last_image()
@@ -0,0 +1,56 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ from .async_task import AsyncTask
10
+ from .logging import get_logger
11
+ from .types import AsyncActionHandler, AsyncImageProvider
12
+
13
+ logger = get_logger("async_short_task")
14
+
15
+
16
+ class AsyncShortTask(AsyncTask):
17
+ """Async task implementation with automatic mode for short-duration tasks."""
18
+
19
+ def __init__(
20
+ self,
21
+ api_key: str | None = None,
22
+ base_url: str | None = None,
23
+ model: str = "vision-model-v1",
24
+ ):
25
+ super().__init__(api_key=api_key, base_url=base_url, model=model)
26
+
27
+ async def auto_mode(
28
+ self,
29
+ task_desc: str,
30
+ max_steps: int = 5,
31
+ executor: AsyncActionHandler = None,
32
+ image_provider: AsyncImageProvider = None,
33
+ ) -> bool:
34
+ """Run the task in automatic mode with the provided executor and image provider."""
35
+ logger.info(
36
+ f"Starting async auto mode for task: '{task_desc}' (max_steps: {max_steps})"
37
+ )
38
+ await self.init_task(task_desc, max_steps=max_steps)
39
+
40
+ for i in range(max_steps):
41
+ logger.debug(f"Async auto mode step {i + 1}/{max_steps}")
42
+ image = await image_provider()
43
+ step = await self.step(image)
44
+ if executor:
45
+ logger.debug(f"Executing {len(step.actions)} actions asynchronously")
46
+ await executor(step.actions)
47
+ if step.stop:
48
+ logger.info(
49
+ f"Async auto mode completed successfully after {i + 1} steps"
50
+ )
51
+ return True
52
+
53
+ logger.warning(
54
+ f"Async auto mode reached max steps ({max_steps}) without completion"
55
+ )
56
+ return False
@@ -0,0 +1,83 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ from pathlib import Path
10
+
11
+ from .async_task import AsyncTask
12
+ from .pil_image import PILImage
13
+ from .types import Image, Step
14
+
15
+
16
+ async def async_single_step(
17
+ task_description: str,
18
+ screenshot: str | bytes | Path | Image,
19
+ instruction: str | None = None,
20
+ api_key: str | None = None,
21
+ base_url: str | None = None,
22
+ ) -> Step:
23
+ """
24
+ Perform a single-step inference asynchronously without maintaining task state.
25
+
26
+ This is useful for one-off analyses where you don't need to maintain
27
+ a conversation or task context across multiple steps.
28
+
29
+ Args:
30
+ task_description: Description of the task to perform
31
+ screenshot: Screenshot as Image, bytes, or file path
32
+ instruction: Optional additional instruction for the task
33
+ api_key: OAGI API key (uses environment variable if not provided)
34
+ base_url: OAGI base URL (uses environment variable if not provided)
35
+
36
+ Returns:
37
+ Step: Object containing reasoning, actions, and completion status
38
+
39
+ Example:
40
+ >>> # Using with bytes
41
+ >>> import asyncio
42
+ >>> async def main():
43
+ ... with open("screenshot.png", "rb") as f:
44
+ ... screenshot_bytes = f.read()
45
+ ... step = await async_single_step(
46
+ ... "Click the submit button",
47
+ ... screenshot=screenshot_bytes
48
+ ... )
49
+ ... print(f"Actions: {step.actions}")
50
+ >>> asyncio.run(main())
51
+
52
+ >>> # Using with file path
53
+ >>> step = await async_single_step(
54
+ ... "Find the search box",
55
+ ... screenshot="screenshot.png"
56
+ ... )
57
+
58
+ >>> # Using with PILImage
59
+ >>> image = PILImage.from_file("screenshot.png")
60
+ >>> step = await async_single_step(
61
+ ... "Click next page",
62
+ ... screenshot=image
63
+ ... )
64
+ """
65
+ # Handle different screenshot input types
66
+ if isinstance(screenshot, (str, Path)):
67
+ # Convert file path to PILImage
68
+ screenshot = PILImage.from_file(str(screenshot))
69
+ elif isinstance(screenshot, bytes):
70
+ # Convert bytes to PILImage
71
+ screenshot = PILImage.from_bytes(screenshot)
72
+
73
+ # Create a temporary task instance
74
+ task = AsyncTask(api_key=api_key, base_url=base_url)
75
+
76
+ try:
77
+ # Initialize task and perform single step
78
+ await task.init_task(task_description)
79
+ result = await task.step(screenshot, instruction=instruction)
80
+ return result
81
+ finally:
82
+ # Clean up resources
83
+ await task.close()
oagi/async_task.py ADDED
@@ -0,0 +1,117 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ from .async_client import AsyncClient
10
+ from .logging import get_logger
11
+ from .sync_client import encode_screenshot_from_bytes
12
+ from .types import Image, Step
13
+
14
+ logger = get_logger("async_task")
15
+
16
+
17
+ class AsyncTask:
18
+ """Async base class for task automation with the OAGI API."""
19
+
20
+ def __init__(
21
+ self,
22
+ api_key: str | None = None,
23
+ base_url: str | None = None,
24
+ model: str = "vision-model-v1",
25
+ ):
26
+ self.client = AsyncClient(base_url=base_url, api_key=api_key)
27
+ self.api_key = self.client.api_key
28
+ self.base_url = self.client.base_url
29
+ self.task_id: str | None = None
30
+ self.task_description: str | None = None
31
+ self.model = model
32
+
33
+ async def init_task(self, task_desc: str, max_steps: int = 5):
34
+ """Initialize a new task with the given description."""
35
+ self.task_description = task_desc
36
+ response = await self.client.create_message(
37
+ model=self.model,
38
+ screenshot="",
39
+ task_description=self.task_description,
40
+ task_id=None,
41
+ )
42
+ self.task_id = response.task_id # Reset task_id for new task
43
+ logger.info(f"Async task initialized: '{task_desc}' (max_steps: {max_steps})")
44
+
45
+ async def step(
46
+ self, screenshot: Image | bytes, instruction: str | None = None
47
+ ) -> Step:
48
+ """Send screenshot to the server and get the next actions.
49
+
50
+ Args:
51
+ screenshot: Screenshot as Image object or raw bytes
52
+ instruction: Optional additional instruction for this step (only works with existing task_id)
53
+
54
+ Returns:
55
+ Step: The actions and reasoning for this step
56
+ """
57
+ if not self.task_description:
58
+ raise ValueError("Task description must be set. Call init_task() first.")
59
+
60
+ logger.debug(f"Executing async step for task: '{self.task_description}'")
61
+
62
+ try:
63
+ # Convert Image to bytes using the protocol
64
+ if isinstance(screenshot, Image):
65
+ screenshot_bytes = screenshot.read()
66
+ else:
67
+ screenshot_bytes = screenshot
68
+ screenshot_b64 = encode_screenshot_from_bytes(screenshot_bytes)
69
+
70
+ # Call API
71
+ response = await self.client.create_message(
72
+ model=self.model,
73
+ screenshot=screenshot_b64,
74
+ task_description=self.task_description,
75
+ task_id=self.task_id,
76
+ instruction=instruction,
77
+ )
78
+
79
+ # Update task_id from response
80
+ if self.task_id != response.task_id:
81
+ if self.task_id is None:
82
+ logger.debug(f"Task ID assigned: {response.task_id}")
83
+ else:
84
+ logger.debug(
85
+ f"Task ID changed: {self.task_id} -> {response.task_id}"
86
+ )
87
+ self.task_id = response.task_id
88
+
89
+ # Convert API response to Step
90
+ result = Step(
91
+ reason=response.reason,
92
+ actions=response.actions,
93
+ stop=response.is_complete,
94
+ )
95
+
96
+ if response.is_complete:
97
+ logger.info(f"Async task completed after {response.current_step} steps")
98
+ else:
99
+ logger.debug(
100
+ f"Async step {response.current_step} completed with {len(response.actions)} actions"
101
+ )
102
+
103
+ return result
104
+
105
+ except Exception as e:
106
+ logger.error(f"Error during async step execution: {e}")
107
+ raise
108
+
109
+ async def close(self):
110
+ """Close the underlying HTTP client to free resources."""
111
+ await self.client.close()
112
+
113
+ async def __aenter__(self):
114
+ return self
115
+
116
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
117
+ await self.close()
@@ -10,10 +10,28 @@ import re
10
10
  import time
11
11
 
12
12
  import pyautogui
13
+ from pydantic import BaseModel, Field
13
14
 
14
15
  from .types import Action, ActionType
15
16
 
16
17
 
18
+ class PyautoguiConfig(BaseModel):
19
+ """Configuration for PyautoguiActionHandler."""
20
+
21
+ drag_duration: float = Field(
22
+ default=0.5, description="Duration for drag operations in seconds"
23
+ )
24
+ scroll_amount: int = Field(
25
+ default=30, description="Amount to scroll (positive for up, negative for down)"
26
+ )
27
+ wait_duration: float = Field(
28
+ default=1.0, description="Duration for wait actions in seconds"
29
+ )
30
+ action_pause: float = Field(
31
+ default=0.1, description="Pause between PyAutoGUI actions in seconds"
32
+ )
33
+
34
+
17
35
  class PyautoguiActionHandler:
18
36
  """
19
37
  Handles actions to be executed using PyAutoGUI.
@@ -29,11 +47,13 @@ class PyautoguiActionHandler:
29
47
  actions (list[Action]): List of actions to be processed and executed.
30
48
  """
31
49
 
32
- def __init__(self):
50
+ def __init__(self, config: PyautoguiConfig | None = None):
51
+ # Use default config if none provided
52
+ self.config = config or PyautoguiConfig()
33
53
  # Get screen dimensions for coordinate denormalization
34
54
  self.screen_width, self.screen_height = pyautogui.size()
35
55
  # Set default delay between actions
36
- pyautogui.PAUSE = 0.1
56
+ pyautogui.PAUSE = self.config.action_pause
37
57
 
38
58
  def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
39
59
  """Convert coordinates from 0-1000 range to actual screen coordinates."""
@@ -82,59 +102,74 @@ class PyautoguiActionHandler:
82
102
  keys = [key.strip() for key in args_str.split("+")]
83
103
  return keys
84
104
 
105
+ def _execute_single_action(self, action: Action) -> None:
106
+ """Execute a single action once."""
107
+ arg = action.argument.strip("()") # Remove outer parentheses if present
108
+
109
+ match action.type:
110
+ case ActionType.CLICK:
111
+ x, y = self._parse_coords(arg)
112
+ pyautogui.click(x, y)
113
+
114
+ case ActionType.LEFT_DOUBLE:
115
+ x, y = self._parse_coords(arg)
116
+ pyautogui.doubleClick(x, y)
117
+
118
+ case ActionType.LEFT_TRIPLE:
119
+ x, y = self._parse_coords(arg)
120
+ pyautogui.tripleClick(x, y)
121
+
122
+ case ActionType.RIGHT_SINGLE:
123
+ x, y = self._parse_coords(arg)
124
+ pyautogui.rightClick(x, y)
125
+
126
+ case ActionType.DRAG:
127
+ x1, y1, x2, y2 = self._parse_drag_coords(arg)
128
+ pyautogui.moveTo(x1, y1)
129
+ pyautogui.dragTo(
130
+ x2, y2, duration=self.config.drag_duration, button="left"
131
+ )
132
+
133
+ case ActionType.HOTKEY:
134
+ keys = self._parse_hotkey(arg)
135
+ pyautogui.hotkey(*keys)
136
+
137
+ case ActionType.TYPE:
138
+ # Remove quotes if present
139
+ text = arg.strip("\"'")
140
+ pyautogui.typewrite(text)
141
+
142
+ case ActionType.SCROLL:
143
+ x, y, direction = self._parse_scroll(arg)
144
+ pyautogui.moveTo(x, y)
145
+ scroll_amount = (
146
+ self.config.scroll_amount
147
+ if direction == "up"
148
+ else -self.config.scroll_amount
149
+ )
150
+ pyautogui.scroll(scroll_amount)
151
+
152
+ case ActionType.FINISH:
153
+ # Task completion - no action needed
154
+ pass
155
+
156
+ case ActionType.WAIT:
157
+ # Wait for a short period
158
+ time.sleep(self.config.wait_duration)
159
+
160
+ case ActionType.CALL_USER:
161
+ # Call user - implementation depends on requirements
162
+ print("User intervention requested")
163
+
164
+ case _:
165
+ print(f"Unknown action type: {action.type}")
166
+
85
167
  def _execute_action(self, action: Action) -> None:
86
- """Execute a single action."""
168
+ """Execute an action, potentially multiple times."""
87
169
  count = action.count or 1
88
- arg = action.argument.strip("()") # Remove outer parentheses if present
89
170
 
90
171
  for _ in range(count):
91
- match action.type:
92
- case ActionType.CLICK:
93
- x, y = self._parse_coords(arg)
94
- pyautogui.click(x, y)
95
-
96
- case ActionType.LEFT_DOUBLE:
97
- x, y = self._parse_coords(arg)
98
- pyautogui.doubleClick(x, y)
99
-
100
- case ActionType.RIGHT_SINGLE:
101
- x, y = self._parse_coords(arg)
102
- pyautogui.rightClick(x, y)
103
-
104
- case ActionType.DRAG:
105
- x1, y1, x2, y2 = self._parse_drag_coords(arg)
106
- pyautogui.moveTo(x1, y1)
107
- pyautogui.dragTo(x2, y2, duration=0.5, button="left")
108
-
109
- case ActionType.HOTKEY:
110
- keys = self._parse_hotkey(arg)
111
- pyautogui.hotkey(*keys)
112
-
113
- case ActionType.TYPE:
114
- # Remove quotes if present
115
- text = arg.strip("\"'")
116
- pyautogui.typewrite(text)
117
-
118
- case ActionType.SCROLL:
119
- x, y, direction = self._parse_scroll(arg)
120
- pyautogui.moveTo(x, y)
121
- scroll_amount = 5 if direction == "up" else -5
122
- pyautogui.scroll(scroll_amount)
123
-
124
- case ActionType.FINISH:
125
- # Task completion - no action needed
126
- pass
127
-
128
- case ActionType.WAIT:
129
- # Wait for a short period
130
- time.sleep(1)
131
-
132
- case ActionType.CALL_USER:
133
- # Call user - implementation depends on requirements
134
- print("User intervention requested")
135
-
136
- case _:
137
- print(f"Unknown action type: {action.type}")
172
+ self._execute_single_action(action)
138
173
 
139
174
  def __call__(self, actions: list[Action]) -> None:
140
175
  """Execute the provided list of actions."""
oagi/short_task.py CHANGED
@@ -41,12 +41,12 @@ class ShortTask(Task):
41
41
  logger.debug(f"Auto mode step {i + 1}/{max_steps}")
42
42
  image = image_provider()
43
43
  step = self.step(image)
44
- if step.stop:
45
- logger.info(f"Auto mode completed successfully after {i + 1} steps")
46
- return True
47
44
  if executor:
48
45
  logger.debug(f"Executing {len(step.actions)} actions")
49
46
  executor(step.actions)
47
+ if step.stop:
48
+ logger.info(f"Auto mode completed successfully after {i + 1} steps")
49
+ return True
50
50
 
51
51
  logger.warning(f"Auto mode reached max steps ({max_steps}) without completion")
52
52
  return False
oagi/types/__init__.py CHANGED
@@ -7,6 +7,8 @@
7
7
  # -----------------------------------------------------------------------------
8
8
 
9
9
  from .action_handler import ActionHandler
10
+ from .async_action_handler import AsyncActionHandler
11
+ from .async_image_provider import AsyncImageProvider
10
12
  from .image import Image
11
13
  from .image_provider import ImageProvider
12
14
  from .models import Action, ActionType, ImageConfig, Step
@@ -18,5 +20,7 @@ __all__ = [
18
20
  "ImageConfig",
19
21
  "Step",
20
22
  "ActionHandler",
23
+ "AsyncActionHandler",
21
24
  "ImageProvider",
25
+ "AsyncImageProvider",
22
26
  ]
@@ -0,0 +1,30 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ from typing import Protocol
10
+
11
+ from .models import Action
12
+
13
+
14
+ class AsyncActionHandler(Protocol):
15
+ async def __call__(self, actions: list[Action]) -> None:
16
+ """
17
+ Asynchronously executes a list of actions.
18
+
19
+ This method takes a list of `Action` objects and executes them asynchronously.
20
+ It is used to perform operations represented by the `Action` instances. This
21
+ method does not return any value and modifies the system based on the input actions.
22
+
23
+ Parameters:
24
+ actions (list[Action]): A list of `Action` objects to be executed. Each
25
+ `Action` must encapsulate the logic that is intended to be applied
26
+ during the call.
27
+
28
+ Raises:
29
+ RuntimeError: If an error occurs during the execution of the actions.
30
+ """
@@ -0,0 +1,37 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ from typing import Protocol
10
+
11
+ from .image import Image
12
+
13
+
14
+ class AsyncImageProvider(Protocol):
15
+ async def __call__(self) -> Image:
16
+ """
17
+ Asynchronously provides an image.
18
+
19
+ This method is responsible for asynchronously capturing, generating, or retrieving
20
+ an image that can be used for task execution or analysis. The method should return
21
+ an object that implements the Image protocol.
22
+
23
+ Returns:
24
+ Image: An object implementing the Image protocol that represents
25
+ the captured or generated image.
26
+
27
+ Raises:
28
+ RuntimeError: If an error occurs during image capture or generation.
29
+ """
30
+
31
+ async def last_image(self) -> Image:
32
+ """
33
+ Asynchronously returns the last captured image.
34
+
35
+ Returns:
36
+ Image: The last captured image.
37
+ """
@@ -14,6 +14,7 @@ from pydantic import BaseModel, Field
14
14
  class ActionType(str, Enum):
15
15
  CLICK = "click"
16
16
  LEFT_DOUBLE = "left_double"
17
+ LEFT_TRIPLE = "left_triple"
17
18
  RIGHT_SINGLE = "right_single"
18
19
  DRAG = "drag"
19
20
  HOTKEY = "hotkey"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: oagi
3
- Version: 0.3.0
3
+ Version: 0.4.1
4
4
  Summary: Official API of OpenAGI Foundation
5
5
  Project-URL: Homepage, https://github.com/agiopen-org/oagi
6
6
  Author-email: OpenAGI Foundation <contact@agiopen.org>
@@ -82,6 +82,23 @@ completed = task.auto_mode(
82
82
  )
83
83
  ```
84
84
 
85
+ Configure PyAutoGUI behavior with custom settings:
86
+
87
+ ```python
88
+ from oagi import PyautoguiActionHandler, PyautoguiConfig
89
+
90
+ # Customize action behavior
91
+ config = PyautoguiConfig(
92
+ drag_duration=1.0, # Slower drags for precision (default: 0.5)
93
+ scroll_amount=50, # Larger scroll steps (default: 30)
94
+ wait_duration=2.0, # Longer waits (default: 1.0)
95
+ action_pause=0.2, # More pause between actions (default: 0.1)
96
+ )
97
+
98
+ executor = PyautoguiActionHandler(config=config)
99
+ task.auto_mode("Complete form", executor=executor, image_provider=ScreenshotMaker())
100
+ ```
101
+
85
102
  ### Image Processing
86
103
 
87
104
  Process and optimize images before sending to API:
@@ -103,6 +120,31 @@ compressed = image.transform(config)
103
120
  step = single_step("Click button", screenshot=compressed)
104
121
  ```
105
122
 
123
+ ### Async Support
124
+
125
+ Use async client for non-blocking operations and better concurrency:
126
+
127
+ ```python
128
+ import asyncio
129
+ from oagi import async_single_step, AsyncShortTask
130
+
131
+ async def main():
132
+ # Single-step async analysis
133
+ step = await async_single_step(
134
+ "Find the search bar",
135
+ screenshot="screenshot.png"
136
+ )
137
+ print(f"Found {len(step.actions)} actions")
138
+
139
+ # Async task automation
140
+ task = AsyncShortTask()
141
+ async with task:
142
+ await task.init_task("Complete the form")
143
+ # ... continue with async operations
144
+
145
+ asyncio.run(main())
146
+ ```
147
+
106
148
  ## Examples
107
149
 
108
150
  See the [`examples/`](examples/) directory for more usage patterns:
@@ -0,0 +1,30 @@
1
+ oagi/__init__.py,sha256=m-Z121YCIwQOPXpTC8kd_UIJizcX8QuHyrSSguQ0KE0,2187
2
+ oagi/async_client.py,sha256=oDj4kIdtaV37uopoAeClCFQTxrYRwHV2HwMAcMdVYwE,8455
3
+ oagi/async_pyautogui_action_handler.py,sha256=F-lKyePCONWI03WnSxpX_QwxONbvnfdQu51wTod6mdw,1614
4
+ oagi/async_screenshot_maker.py,sha256=pI-dbLcYOzcO1ffgTmozAdbYJQNBPKA7hmqj1RxEmIY,1688
5
+ oagi/async_short_task.py,sha256=jvFTbmXTxFlkpAwmWeZlxbTSv_RB7V561hxw6gUcigw,1961
6
+ oagi/async_single_step.py,sha256=QawXO4GyfMz6O9jV8QBC1vKxFuS9vjKQxxJ1nwgHBzI,2838
7
+ oagi/async_task.py,sha256=bclqtgg7mI2WAp-62jOz044tVk4wruycpn9NYDncnA8,4145
8
+ oagi/exceptions.py,sha256=VMwVS8ouE9nHhBpN3AZMYt5_U2kGcihWaTnBhoQLquo,1662
9
+ oagi/logging.py,sha256=CWe89mA5MKTipIvfrqSYkv2CAFNBSwHMDQMDkG_g64g,1350
10
+ oagi/pil_image.py,sha256=Zp7YNwyE_AT25ZEFsWKbzMxbO8JOQsJ1Espph5ye8k8,3804
11
+ oagi/pyautogui_action_handler.py,sha256=ix_Zl9uHch3Oz1H6bNEb7-1ee3-qiW_MsT-4SbWBf7g,6610
12
+ oagi/screenshot_maker.py,sha256=sVuW7jn-K4FmLhmYI-akdNI-UVcTeBzh9P1_qJhoq1s,1282
13
+ oagi/short_task.py,sha256=9l1PDX70vDUEX2CIJ66yaAtb96P3mK_m95JffspnYFI,1779
14
+ oagi/single_step.py,sha256=djhGOHzA5Y3-9_ity9QiJr_ObZZ04blSmNZsLXXXfkg,2939
15
+ oagi/sync_client.py,sha256=E6EgFIe-H91rdsPhF1puwrBTpOnKaL6JA1WHR4R-CLY,9395
16
+ oagi/task.py,sha256=JfsugIhBrwDmi1xOEVQdqmXsGFK-H4p17-B4rM8kbWs,4001
17
+ oagi/types/__init__.py,sha256=YXxL-30f92qAf9U6LZuVCtKFG-Pi3xahKedaNxyrxFE,766
18
+ oagi/types/action_handler.py,sha256=NH8E-m5qpGqWcXzTSWfF7W0Xdp8SkzJsbhCmQ0B96cg,1075
19
+ oagi/types/async_action_handler.py,sha256=k1AaqSkFcXlxwW8sn-w0WFHGsIqHFLbcOPrkknmSVug,1116
20
+ oagi/types/async_image_provider.py,sha256=wnhRyPtTmuALt45Qore74-RCkP5yxU9sZGjvOzFqzOk,1170
21
+ oagi/types/image.py,sha256=KgPCCTJ6D5vHIaGZdbTE7eQEa1WlT6G9tf59ZuUCV2U,537
22
+ oagi/types/image_provider.py,sha256=oYFdOYznrK_VOR9egzOjw5wFM5w8EY2sY01pH0ANAgU,1112
23
+ oagi/types/models/__init__.py,sha256=bVzzGxb6lVxAQyJpy0Z1QknSe-xC3g4OIDr7t-p_3Ys,467
24
+ oagi/types/models/action.py,sha256=hh6mRRSSWgrW4jpZo71zGMCOcZpV5_COu4148uG6G48,967
25
+ oagi/types/models/image_config.py,sha256=tl6abVg_-IAPLwpaWprgknXu7wRWriMg-AEVyUX73v0,1567
26
+ oagi/types/models/step.py,sha256=RSI4H_2rrUBq_xyCoWKaq7JHdJWNobtQppaKC1l0aWU,471
27
+ oagi-0.4.1.dist-info/METADATA,sha256=AGVosMgpoFwLAB9BWAknn7a3aXwVAiUAWvTQJm0w3RY,4620
28
+ oagi-0.4.1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
29
+ oagi-0.4.1.dist-info/licenses/LICENSE,sha256=sy5DLA2M29jFT4UfWsuBF9BAr3FnRkYtnAu6oDZiIf8,1075
30
+ oagi-0.4.1.dist-info/RECORD,,
@@ -1,22 +0,0 @@
1
- oagi/__init__.py,sha256=1pewp0wOcGI8urjOOCskwiJC9VghhGCRpsslf-VUiLI,1493
2
- oagi/exceptions.py,sha256=VMwVS8ouE9nHhBpN3AZMYt5_U2kGcihWaTnBhoQLquo,1662
3
- oagi/logging.py,sha256=CWe89mA5MKTipIvfrqSYkv2CAFNBSwHMDQMDkG_g64g,1350
4
- oagi/pil_image.py,sha256=Zp7YNwyE_AT25ZEFsWKbzMxbO8JOQsJ1Espph5ye8k8,3804
5
- oagi/pyautogui_action_handler.py,sha256=LBWmtqkXzZSJo07s3uOw-NWUE9rZZtbNAx0YI83pCbk,5482
6
- oagi/screenshot_maker.py,sha256=sVuW7jn-K4FmLhmYI-akdNI-UVcTeBzh9P1_qJhoq1s,1282
7
- oagi/short_task.py,sha256=fJcirqD7X3_GyINTGdOoe6wi-VFHfP-C8m-zxCvgY5M,1779
8
- oagi/single_step.py,sha256=djhGOHzA5Y3-9_ity9QiJr_ObZZ04blSmNZsLXXXfkg,2939
9
- oagi/sync_client.py,sha256=E6EgFIe-H91rdsPhF1puwrBTpOnKaL6JA1WHR4R-CLY,9395
10
- oagi/task.py,sha256=JfsugIhBrwDmi1xOEVQdqmXsGFK-H4p17-B4rM8kbWs,4001
11
- oagi/types/__init__.py,sha256=dj_UWdpRzhuVyi-pegQAv2V0f1DxidFxjWUhpcWzYKE,608
12
- oagi/types/action_handler.py,sha256=NH8E-m5qpGqWcXzTSWfF7W0Xdp8SkzJsbhCmQ0B96cg,1075
13
- oagi/types/image.py,sha256=KgPCCTJ6D5vHIaGZdbTE7eQEa1WlT6G9tf59ZuUCV2U,537
14
- oagi/types/image_provider.py,sha256=oYFdOYznrK_VOR9egzOjw5wFM5w8EY2sY01pH0ANAgU,1112
15
- oagi/types/models/__init__.py,sha256=bVzzGxb6lVxAQyJpy0Z1QknSe-xC3g4OIDr7t-p_3Ys,467
16
- oagi/types/models/action.py,sha256=8Xd3IcH32ENq7uXczo-mbQ736yUOGxO_TaZTfHVRY7w,935
17
- oagi/types/models/image_config.py,sha256=tl6abVg_-IAPLwpaWprgknXu7wRWriMg-AEVyUX73v0,1567
18
- oagi/types/models/step.py,sha256=RSI4H_2rrUBq_xyCoWKaq7JHdJWNobtQppaKC1l0aWU,471
19
- oagi-0.3.0.dist-info/METADATA,sha256=BtkLuhcIXhL43C23nZa6uZNcUuhlhXjJ67OaaXxeEmI,3461
20
- oagi-0.3.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
21
- oagi-0.3.0.dist-info/licenses/LICENSE,sha256=sy5DLA2M29jFT4UfWsuBF9BAr3FnRkYtnAu6oDZiIf8,1075
22
- oagi-0.3.0.dist-info/RECORD,,
File without changes