oagi-core 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. oagi/__init__.py +148 -0
  2. oagi/agent/__init__.py +33 -0
  3. oagi/agent/default.py +124 -0
  4. oagi/agent/factories.py +74 -0
  5. oagi/agent/observer/__init__.py +38 -0
  6. oagi/agent/observer/agent_observer.py +99 -0
  7. oagi/agent/observer/events.py +28 -0
  8. oagi/agent/observer/exporters.py +445 -0
  9. oagi/agent/observer/protocol.py +12 -0
  10. oagi/agent/protocol.py +55 -0
  11. oagi/agent/registry.py +155 -0
  12. oagi/agent/tasker/__init__.py +33 -0
  13. oagi/agent/tasker/memory.py +160 -0
  14. oagi/agent/tasker/models.py +77 -0
  15. oagi/agent/tasker/planner.py +408 -0
  16. oagi/agent/tasker/taskee_agent.py +512 -0
  17. oagi/agent/tasker/tasker_agent.py +324 -0
  18. oagi/cli/__init__.py +11 -0
  19. oagi/cli/agent.py +281 -0
  20. oagi/cli/display.py +56 -0
  21. oagi/cli/main.py +77 -0
  22. oagi/cli/server.py +94 -0
  23. oagi/cli/tracking.py +55 -0
  24. oagi/cli/utils.py +89 -0
  25. oagi/client/__init__.py +12 -0
  26. oagi/client/async_.py +290 -0
  27. oagi/client/base.py +457 -0
  28. oagi/client/sync.py +293 -0
  29. oagi/exceptions.py +118 -0
  30. oagi/handler/__init__.py +24 -0
  31. oagi/handler/_macos.py +55 -0
  32. oagi/handler/async_pyautogui_action_handler.py +44 -0
  33. oagi/handler/async_screenshot_maker.py +47 -0
  34. oagi/handler/pil_image.py +102 -0
  35. oagi/handler/pyautogui_action_handler.py +291 -0
  36. oagi/handler/screenshot_maker.py +41 -0
  37. oagi/logging.py +55 -0
  38. oagi/server/__init__.py +13 -0
  39. oagi/server/agent_wrappers.py +98 -0
  40. oagi/server/config.py +46 -0
  41. oagi/server/main.py +157 -0
  42. oagi/server/models.py +98 -0
  43. oagi/server/session_store.py +116 -0
  44. oagi/server/socketio_server.py +405 -0
  45. oagi/task/__init__.py +21 -0
  46. oagi/task/async_.py +101 -0
  47. oagi/task/async_short.py +76 -0
  48. oagi/task/base.py +157 -0
  49. oagi/task/short.py +76 -0
  50. oagi/task/sync.py +99 -0
  51. oagi/types/__init__.py +50 -0
  52. oagi/types/action_handler.py +30 -0
  53. oagi/types/async_action_handler.py +30 -0
  54. oagi/types/async_image_provider.py +38 -0
  55. oagi/types/image.py +17 -0
  56. oagi/types/image_provider.py +35 -0
  57. oagi/types/models/__init__.py +32 -0
  58. oagi/types/models/action.py +33 -0
  59. oagi/types/models/client.py +68 -0
  60. oagi/types/models/image_config.py +47 -0
  61. oagi/types/models/step.py +17 -0
  62. oagi/types/step_observer.py +93 -0
  63. oagi/types/url.py +3 -0
  64. oagi_core-0.10.1.dist-info/METADATA +245 -0
  65. oagi_core-0.10.1.dist-info/RECORD +68 -0
  66. oagi_core-0.10.1.dist-info/WHEEL +4 -0
  67. oagi_core-0.10.1.dist-info/entry_points.txt +2 -0
  68. oagi_core-0.10.1.dist-info/licenses/LICENSE +21 -0
oagi/client/sync.py ADDED
@@ -0,0 +1,293 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ from functools import wraps
10
+
11
+ import httpx
12
+ from httpx import Response
13
+
14
+ from ..logging import get_logger
15
+ from ..types import Image
16
+ from ..types.models import GenerateResponse, LLMResponse, UploadFileResponse
17
+ from .base import BaseClient
18
+
19
+ logger = get_logger("sync_client")
20
+
21
+
22
+ def _log_trace_id(response: Response):
23
+ logger.error(f"Request Id: {response.headers.get('x-request-id', '')}")
24
+ logger.error(f"Trace Id: {response.headers.get('x-trace-id', '')}")
25
+
26
+
27
+ def log_trace_on_failure(func):
28
+ """Decorator that logs trace ID when a method fails."""
29
+
30
+ @wraps(func)
31
+ def wrapper(*args, **kwargs):
32
+ try:
33
+ return func(*args, **kwargs)
34
+ except Exception as e:
35
+ # Try to get response from the exception if it has one
36
+ if (response := getattr(e, "response", None)) is not None:
37
+ _log_trace_id(response)
38
+ raise
39
+
40
+ return wrapper
41
+
42
+
43
+ class SyncClient(BaseClient[httpx.Client]):
44
+ """Synchronous HTTP client for the OAGI API."""
45
+
46
+ def __init__(self, base_url: str | None = None, api_key: str | None = None):
47
+ super().__init__(base_url, api_key)
48
+ self.client = httpx.Client(base_url=self.base_url)
49
+ self.upload_client = httpx.Client(timeout=60) # client for uploading image
50
+ logger.info(f"SyncClient initialized with base_url: {self.base_url}")
51
+
52
+ def __enter__(self):
53
+ return self
54
+
55
+ def __exit__(self, exc_type, exc_val, exc_tb):
56
+ self.client.close()
57
+ self.upload_client.close()
58
+
59
+ def close(self):
60
+ """Close the underlying httpx clients."""
61
+ self.client.close()
62
+ self.upload_client.close()
63
+
64
+ @log_trace_on_failure
65
+ def create_message(
66
+ self,
67
+ model: str,
68
+ screenshot: bytes | None = None,
69
+ screenshot_url: str | None = None,
70
+ task_description: str | None = None,
71
+ task_id: str | None = None,
72
+ instruction: str | None = None,
73
+ messages_history: list | None = None,
74
+ temperature: float | None = None,
75
+ api_version: str | None = None,
76
+ ) -> LLMResponse | None:
77
+ """
78
+ Call the /v2/message endpoint to analyze task and screenshot
79
+
80
+ Args:
81
+ model: The model to use for task analysis
82
+ screenshot: Screenshot image bytes (mutually exclusive with screenshot_url)
83
+ screenshot_url: Direct URL to screenshot (mutually exclusive with screenshot)
84
+ task_description: Description of the task (required for new sessions)
85
+ task_id: Task ID for continuing existing task
86
+ instruction: Additional instruction when continuing a session
87
+ messages_history: OpenAI-compatible chat message history
88
+ temperature: Sampling temperature (0.0-2.0) for LLM inference
89
+ api_version: API version header
90
+
91
+ Returns:
92
+ LLMResponse: The response from the API
93
+
94
+ Raises:
95
+ ValueError: If both or neither screenshot and screenshot_url are provided
96
+ httpx.HTTPStatusError: For HTTP error responses
97
+ """
98
+ # Validate that exactly one is provided
99
+ if (screenshot is None) == (screenshot_url is None):
100
+ raise ValueError(
101
+ "Exactly one of 'screenshot' or 'screenshot_url' must be provided"
102
+ )
103
+
104
+ self._log_request_info(model, task_description, task_id)
105
+
106
+ # Upload screenshot to S3 if bytes provided, otherwise use URL directly
107
+ upload_file_response = None
108
+ if screenshot is not None:
109
+ upload_file_response = self.put_s3_presigned_url(screenshot, api_version)
110
+
111
+ # Prepare message payload
112
+ headers, payload = self._prepare_message_payload(
113
+ model=model,
114
+ upload_file_response=upload_file_response,
115
+ task_description=task_description,
116
+ task_id=task_id,
117
+ instruction=instruction,
118
+ messages_history=messages_history,
119
+ temperature=temperature,
120
+ api_version=api_version,
121
+ screenshot_url=screenshot_url,
122
+ )
123
+
124
+ # Make request
125
+ try:
126
+ response = self.client.post(
127
+ "/v2/message", json=payload, headers=headers, timeout=self.timeout
128
+ )
129
+ return self._process_response(response)
130
+ except (httpx.TimeoutException, httpx.NetworkError) as e:
131
+ self._handle_upload_http_errors(e)
132
+
133
+ def health_check(self) -> dict:
134
+ """
135
+ Call the /health endpoint for health check
136
+
137
+ Returns:
138
+ dict: Health check response
139
+ """
140
+ logger.debug("Making health check request")
141
+ try:
142
+ response = self.client.get("/health")
143
+ response.raise_for_status()
144
+ result = response.json()
145
+ logger.debug("Health check successful")
146
+ return result
147
+ except httpx.HTTPStatusError as e:
148
+ logger.warning(f"Health check failed: {e}")
149
+ raise
150
+
151
+ def get_s3_presigned_url(
152
+ self,
153
+ api_version: str | None = None,
154
+ ) -> UploadFileResponse:
155
+ """
156
+ Call the /v1/file/upload endpoint to get a S3 presigned URL
157
+
158
+ Args:
159
+ api_version: API version header
160
+
161
+ Returns:
162
+ UploadFileResponse: The response from /v1/file/upload with uuid and presigned S3 URL
163
+ """
164
+ logger.debug("Making API request to /v1/file/upload")
165
+
166
+ try:
167
+ headers = self._build_headers(api_version)
168
+ response = self.client.get(
169
+ "/v1/file/upload", headers=headers, timeout=self.timeout
170
+ )
171
+ return self._process_upload_response(response)
172
+ except (httpx.TimeoutException, httpx.NetworkError, httpx.HTTPStatusError) as e:
173
+ self._handle_upload_http_errors(e, getattr(e, "response", None))
174
+
175
+ def upload_to_s3(
176
+ self,
177
+ url: str,
178
+ content: bytes | Image,
179
+ ) -> None:
180
+ """
181
+ Upload image bytes to S3 using presigned URL
182
+
183
+ Args:
184
+ url: S3 presigned URL
185
+ content: Image bytes or Image object to upload
186
+
187
+ Raises:
188
+ APIError: If upload fails
189
+ """
190
+ logger.debug("Uploading image to S3")
191
+
192
+ # Convert Image to bytes if needed
193
+ if isinstance(content, Image):
194
+ content = content.read()
195
+
196
+ response = None
197
+ try:
198
+ response = self.upload_client.put(url=url, content=content)
199
+ response.raise_for_status()
200
+ except Exception as e:
201
+ self._handle_s3_upload_error(e, response)
202
+
203
+ def put_s3_presigned_url(
204
+ self,
205
+ screenshot: bytes | Image,
206
+ api_version: str | None = None,
207
+ ) -> UploadFileResponse:
208
+ """
209
+ Get S3 presigned URL and upload image (convenience method)
210
+
211
+ Args:
212
+ screenshot: Screenshot image bytes or Image object
213
+ api_version: API version header
214
+
215
+ Returns:
216
+ UploadFileResponse: The response from /v1/file/upload with uuid and presigned S3 URL
217
+ """
218
+ upload_file_response = self.get_s3_presigned_url(api_version)
219
+ self.upload_to_s3(upload_file_response.url, screenshot)
220
+ return upload_file_response
221
+
222
+ @log_trace_on_failure
223
+ def call_worker(
224
+ self,
225
+ worker_id: str,
226
+ overall_todo: str,
227
+ task_description: str,
228
+ todos: list[dict],
229
+ history: list[dict] | None = None,
230
+ current_todo_index: int | None = None,
231
+ task_execution_summary: str | None = None,
232
+ current_screenshot: str | None = None,
233
+ current_subtask_instruction: str | None = None,
234
+ window_steps: list[dict] | None = None,
235
+ window_screenshots: list[str] | None = None,
236
+ result_screenshot: str | None = None,
237
+ prior_notes: str | None = None,
238
+ latest_todo_summary: str | None = None,
239
+ api_version: str | None = None,
240
+ ) -> GenerateResponse:
241
+ """Call the /v1/generate endpoint for OAGI worker processing.
242
+
243
+ Args:
244
+ worker_id: One of "oagi_first", "oagi_follow", "oagi_task_summary"
245
+ overall_todo: Current todo description
246
+ task_description: Overall task description
247
+ todos: List of todo dicts with index, description, status, execution_summary
248
+ history: List of history dicts with todo_index, todo_description, action_count, summary, completed
249
+ current_todo_index: Index of current todo being executed
250
+ task_execution_summary: Summary of overall task execution
251
+ current_screenshot: Uploaded file UUID for screenshot (oagi_first)
252
+ current_subtask_instruction: Subtask instruction (oagi_follow)
253
+ window_steps: Action steps list (oagi_follow)
254
+ window_screenshots: Uploaded file UUIDs list (oagi_follow)
255
+ result_screenshot: Uploaded file UUID for result screenshot (oagi_follow)
256
+ prior_notes: Execution notes (oagi_follow)
257
+ latest_todo_summary: Latest summary (oagi_task_summary)
258
+ api_version: API version header
259
+
260
+ Returns:
261
+ GenerateResponse with LLM output and usage stats
262
+
263
+ Raises:
264
+ ValueError: If worker_id is invalid
265
+ APIError: If API returns error
266
+ """
267
+ # Prepare request (validation, payload, headers)
268
+ payload, headers = self._prepare_worker_request(
269
+ worker_id=worker_id,
270
+ overall_todo=overall_todo,
271
+ task_description=task_description,
272
+ todos=todos,
273
+ history=history,
274
+ current_todo_index=current_todo_index,
275
+ task_execution_summary=task_execution_summary,
276
+ current_screenshot=current_screenshot,
277
+ current_subtask_instruction=current_subtask_instruction,
278
+ window_steps=window_steps,
279
+ window_screenshots=window_screenshots,
280
+ result_screenshot=result_screenshot,
281
+ prior_notes=prior_notes,
282
+ latest_todo_summary=latest_todo_summary,
283
+ api_version=api_version,
284
+ )
285
+
286
+ # Make request
287
+ try:
288
+ response = self.client.post(
289
+ "/v1/generate", json=payload, headers=headers, timeout=self.timeout
290
+ )
291
+ return self._process_generate_response(response)
292
+ except (httpx.TimeoutException, httpx.NetworkError) as e:
293
+ self._handle_upload_http_errors(e)
oagi/exceptions.py ADDED
@@ -0,0 +1,118 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import importlib.util
10
+
11
+ import httpx
12
+
13
+
14
+ class OAGIError(Exception):
15
+ pass
16
+
17
+
18
+ class APIError(OAGIError):
19
+ def __init__(
20
+ self,
21
+ message: str,
22
+ code: str | None = None,
23
+ status_code: int | None = None,
24
+ response: httpx.Response | None = None,
25
+ ):
26
+ """Initialize APIError.
27
+
28
+ Args:
29
+ message: Human-readable error message
30
+ code: API error code for programmatic handling
31
+ status_code: HTTP status code
32
+ response: Original HTTP response object
33
+ """
34
+ super().__init__(message)
35
+ self.message = message
36
+ self.code = code
37
+ self.status_code = status_code
38
+ self.response = response
39
+
40
+ def __str__(self) -> str:
41
+ if self.code:
42
+ return f"API Error [{self.code}]: {self.message}"
43
+ return f"API Error: {self.message}"
44
+
45
+
46
+ class AuthenticationError(APIError):
47
+ pass
48
+
49
+
50
+ class RateLimitError(APIError):
51
+ pass
52
+
53
+
54
+ class ValidationError(APIError):
55
+ pass
56
+
57
+
58
+ class NotFoundError(APIError):
59
+ pass
60
+
61
+
62
+ class ServerError(APIError):
63
+ pass
64
+
65
+
66
+ class NetworkError(OAGIError):
67
+ def __init__(self, message: str, original_error: Exception | None = None):
68
+ super().__init__(message)
69
+ self.original_error = original_error
70
+
71
+
72
+ class RequestTimeoutError(NetworkError):
73
+ pass
74
+
75
+
76
+ class ConfigurationError(OAGIError):
77
+ pass
78
+
79
+
80
+ def check_optional_dependency(
81
+ name: str,
82
+ feature: str,
83
+ extra: str,
84
+ raise_error: bool = True,
85
+ ) -> bool:
86
+ """Check if an optional dependency is available, raise helpful error if not.
87
+
88
+ This function validates that an optional dependency is installed without
89
+ returning the module, allowing the caller to use a regular import statement
90
+ afterward. This preserves IDE features like type hints, autocomplete, and
91
+ go-to-definition.
92
+
93
+ Args:
94
+ name: Module name to check (e.g., "pyautogui", "PIL")
95
+ feature: Feature name for error message (e.g., "PyautoguiActionHandler")
96
+ extra: extras_require key (e.g., "desktop", "server")
97
+ raise_error: Whether to raise an ImportError if the module is not installed
98
+
99
+ Raises:
100
+ ImportError: If the module is not installed, with installation instructions
101
+
102
+ Example:
103
+ >>> check_optional_dependency("pyautogui", "PyautoguiActionHandler", "desktop")
104
+ >>> import pyautogui # Full IDE support: types, autocomplete, navigation
105
+ >>> pyautogui.click(100, 100)
106
+ """
107
+ spec = importlib.util.find_spec(name)
108
+ if spec is not None:
109
+ return True
110
+
111
+ msg = (
112
+ f"{feature} requires {extra} dependencies. "
113
+ f"Install with: pip install oagi[{extra}]"
114
+ )
115
+ if raise_error:
116
+ raise ImportError(msg)
117
+ else:
118
+ return False
@@ -0,0 +1,24 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+ from oagi.handler.async_pyautogui_action_handler import AsyncPyautoguiActionHandler
9
+ from oagi.handler.async_screenshot_maker import AsyncScreenshotMaker
10
+ from oagi.handler.pil_image import PILImage
11
+ from oagi.handler.pyautogui_action_handler import (
12
+ PyautoguiActionHandler,
13
+ PyautoguiConfig,
14
+ )
15
+ from oagi.handler.screenshot_maker import ScreenshotMaker
16
+
17
+ __all__ = [
18
+ "PILImage",
19
+ "PyautoguiActionHandler",
20
+ "PyautoguiConfig",
21
+ "AsyncPyautoguiActionHandler",
22
+ "ScreenshotMaker",
23
+ "AsyncScreenshotMaker",
24
+ ]
oagi/handler/_macos.py ADDED
@@ -0,0 +1,55 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import pyautogui
10
+
11
+ from ..exceptions import check_optional_dependency
12
+
13
+ check_optional_dependency("Quartz", "macOS multiple clicks", "desktop")
14
+ import Quartz # noqa: E402
15
+
16
+
17
+ def macos_click(x: int, y: int, clicks: int = 1) -> None:
18
+ """
19
+ Execute a mouse click sequence on macOS with correct click state.
20
+
21
+ This avoids the PyAutoGUI bug where multi-clicks are sent as separate
22
+ single clicks (clickState=1), which macOS interprets as distinct events
23
+ rather than double/triple clicks.
24
+
25
+ Check https://github.com/asweigart/pyautogui/issues/672
26
+
27
+ Args:
28
+ x: X coordinate
29
+ y: Y coordinate
30
+ clicks: Number of clicks (1=single, 2=double, 3=triple)
31
+ """
32
+ # Move to position first using pyautogui to ensure consistency
33
+ pyautogui.moveTo(x, y)
34
+
35
+ point = Quartz.CGPoint(x=x, y=y)
36
+
37
+ # Create and post events for each click in the sequence
38
+ for i in range(1, clicks + 1):
39
+ # Create Down/Up events
40
+ mouse_down = Quartz.CGEventCreateMouseEvent(
41
+ None, Quartz.kCGEventLeftMouseDown, point, Quartz.kCGMouseButtonLeft
42
+ )
43
+ mouse_up = Quartz.CGEventCreateMouseEvent(
44
+ None, Quartz.kCGEventLeftMouseUp, point, Quartz.kCGMouseButtonLeft
45
+ )
46
+
47
+ # Set the click state (1 for first click, 2 for second, etc.)
48
+ Quartz.CGEventSetIntegerValueField(
49
+ mouse_down, Quartz.kCGMouseEventClickState, i
50
+ )
51
+ Quartz.CGEventSetIntegerValueField(mouse_up, Quartz.kCGMouseEventClickState, i)
52
+
53
+ # Post events
54
+ Quartz.CGEventPost(Quartz.kCGHIDEventTap, mouse_down)
55
+ Quartz.CGEventPost(Quartz.kCGHIDEventTap, mouse_up)
@@ -0,0 +1,44 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import asyncio
10
+
11
+ from ..types import Action
12
+ from .pyautogui_action_handler import PyautoguiActionHandler, PyautoguiConfig
13
+
14
+
15
+ class AsyncPyautoguiActionHandler:
16
+ """
17
+ Async wrapper for PyautoguiActionHandler that runs actions in a thread pool.
18
+
19
+ This allows PyAutoGUI operations to be non-blocking in async contexts,
20
+ enabling concurrent execution of other async tasks while GUI actions are performed.
21
+ """
22
+
23
+ def __init__(self, config: PyautoguiConfig | None = None):
24
+ """Initialize with optional configuration.
25
+
26
+ Args:
27
+ config: PyautoguiConfig instance for customizing behavior
28
+ """
29
+ self.sync_handler = PyautoguiActionHandler(config=config)
30
+ self.config = config or PyautoguiConfig()
31
+
32
+ async def __call__(self, actions: list[Action]) -> None:
33
+ """
34
+ Execute actions asynchronously using a thread pool executor.
35
+
36
+ This prevents PyAutoGUI operations from blocking the async event loop,
37
+ allowing other coroutines to run while GUI actions are being performed.
38
+
39
+ Args:
40
+ actions: List of actions to execute
41
+ """
42
+ loop = asyncio.get_event_loop()
43
+ # Run the synchronous handler in a thread pool to avoid blocking
44
+ await loop.run_in_executor(None, self.sync_handler, actions)
@@ -0,0 +1,47 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import asyncio
10
+
11
+ from ..types import Image, ImageConfig
12
+ from .screenshot_maker import ScreenshotMaker
13
+
14
+
15
+ class AsyncScreenshotMaker:
16
+ """
17
+ Async wrapper for ScreenshotMaker that captures screenshots in a thread pool.
18
+
19
+ This allows screenshot capture to be non-blocking in async contexts,
20
+ enabling concurrent execution of other async tasks while screenshots are taken.
21
+ """
22
+
23
+ def __init__(self, config: ImageConfig | None = None):
24
+ """Initialize with optional image configuration.
25
+
26
+ Args:
27
+ config: ImageConfig instance for customizing screenshot format and quality
28
+ """
29
+ self.sync_screenshot_maker = ScreenshotMaker(config=config)
30
+ self.config = config
31
+
32
+ async def __call__(self) -> Image:
33
+ """
34
+ Capture a screenshot asynchronously using a thread pool executor.
35
+
36
+ This prevents screenshot capture from blocking the async event loop,
37
+ allowing other coroutines to run while the screenshot is being taken.
38
+
39
+ Returns:
40
+ Image: The captured screenshot as a PILImage
41
+ """
42
+ loop = asyncio.get_event_loop()
43
+ # Run the synchronous screenshot capture in a thread pool to avoid blocking
44
+ return await loop.run_in_executor(None, self.sync_screenshot_maker)
45
+
46
+ async def last_image(self) -> Image:
47
+ return self.sync_screenshot_maker.last_image()
@@ -0,0 +1,102 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import io
10
+
11
+ from ..exceptions import check_optional_dependency
12
+ from ..types.models.image_config import ImageConfig
13
+
14
+ check_optional_dependency("PIL", "PILImage", "desktop")
15
+ from PIL import Image as PILImageLib # noqa: E402
16
+
17
+
18
+ class PILImage:
19
+ """PIL image wrapper with transformation capabilities."""
20
+
21
+ def __init__(self, image: PILImageLib.Image, config: ImageConfig | None = None):
22
+ """Initialize with a PIL image and optional config."""
23
+ self.image = image
24
+ self.config = config or ImageConfig()
25
+ self._cached_bytes: bytes | None = None
26
+
27
+ @classmethod
28
+ def from_file(cls, path: str, config: ImageConfig | None = None) -> "PILImage":
29
+ """Create PILImage from file path."""
30
+ image = PILImageLib.open(path)
31
+ return cls(image, config)
32
+
33
+ @classmethod
34
+ def from_bytes(cls, data: bytes, config: ImageConfig | None = None) -> "PILImage":
35
+ """Create PILImage from raw bytes."""
36
+ image = PILImageLib.open(io.BytesIO(data))
37
+ return cls(image, config)
38
+
39
+ @classmethod
40
+ def from_screenshot(cls, config: ImageConfig | None = None) -> "PILImage":
41
+ """Create PILImage from screenshot."""
42
+ # Lazy import to avoid DISPLAY issues in headless environments
43
+ check_optional_dependency("pyautogui", "PILImage.from_screenshot()", "desktop")
44
+ import pyautogui # noqa: PLC0415
45
+
46
+ screenshot = pyautogui.screenshot()
47
+ return cls(screenshot, config)
48
+
49
+ def transform(self, config: ImageConfig) -> "PILImage":
50
+ """Apply transformations (resize) based on config and return new PILImage."""
51
+ # Apply resize if needed
52
+ transformed = self._resize(self.image, config)
53
+ # Return new PILImage with the config (format conversion happens on read())
54
+ return PILImage(transformed, config)
55
+
56
+ def _resize(
57
+ self, image: PILImageLib.Image, config: ImageConfig
58
+ ) -> PILImageLib.Image:
59
+ """Resize image based on config."""
60
+ if config.width or config.height:
61
+ # Get target dimensions (use original if not specified)
62
+ target_width = config.width or image.width
63
+ target_height = config.height or image.height
64
+
65
+ # Map resample string to PIL constant
66
+ resample_map = {
67
+ "NEAREST": PILImageLib.NEAREST,
68
+ "BILINEAR": PILImageLib.BILINEAR,
69
+ "BICUBIC": PILImageLib.BICUBIC,
70
+ "LANCZOS": PILImageLib.LANCZOS,
71
+ }
72
+ resample = resample_map[config.resample]
73
+
74
+ # Resize to exact dimensions
75
+ return image.resize((target_width, target_height), resample)
76
+ return image
77
+
78
+ def _convert_format(self, image: PILImageLib.Image) -> bytes:
79
+ """Convert image to configured format (PNG or JPEG)."""
80
+ buffer = io.BytesIO()
81
+ save_kwargs = {"format": self.config.format}
82
+
83
+ if self.config.format == "JPEG":
84
+ save_kwargs["quality"] = self.config.quality
85
+ # Convert RGBA to RGB for JPEG if needed
86
+ if image.mode == "RGBA":
87
+ rgb_image = PILImageLib.new("RGB", image.size, (255, 255, 255))
88
+ rgb_image.paste(image, mask=image.split()[3])
89
+ rgb_image.save(buffer, **save_kwargs)
90
+ else:
91
+ image.save(buffer, **save_kwargs)
92
+ elif self.config.format == "PNG":
93
+ save_kwargs["optimize"] = self.config.optimize
94
+ image.save(buffer, **save_kwargs)
95
+
96
+ return buffer.getvalue()
97
+
98
+ def read(self) -> bytes:
99
+ """Read image as bytes with current config (implements Image protocol)."""
100
+ if self._cached_bytes is None:
101
+ self._cached_bytes = self._convert_format(self.image)
102
+ return self._cached_bytes