oagi 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of oagi might be problematic. Click here for more details.

Files changed (51) hide show
  1. oagi-0.3.0/PKG-INFO +119 -0
  2. oagi-0.3.0/README.md +85 -0
  3. {oagi-0.2.0 → oagi-0.3.0}/examples/execute_task_manual.py +3 -0
  4. oagi-0.3.0/examples/screenshot_with_config.py +94 -0
  5. {oagi-0.2.0 → oagi-0.3.0}/pyproject.toml +1 -1
  6. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/__init__.py +6 -0
  7. oagi-0.3.0/src/oagi/pil_image.py +98 -0
  8. oagi-0.3.0/src/oagi/screenshot_maker.py +41 -0
  9. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/short_task.py +8 -0
  10. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/single_step.py +4 -3
  11. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/sync_client.py +24 -0
  12. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/task.py +7 -2
  13. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/types/__init__.py +10 -2
  14. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/types/models/__init__.py +2 -1
  15. oagi-0.3.0/src/oagi/types/models/image_config.py +47 -0
  16. {oagi-0.2.0 → oagi-0.3.0}/tests/conftest.py +15 -0
  17. oagi-0.3.0/tests/test_logging.py +335 -0
  18. oagi-0.3.0/tests/test_pil_image.py +236 -0
  19. oagi-0.3.0/tests/test_screenshot_maker.py +175 -0
  20. {oagi-0.2.0 → oagi-0.3.0}/tests/test_short_task.py +14 -0
  21. {oagi-0.2.0 → oagi-0.3.0}/tests/test_single_step.py +22 -15
  22. oagi-0.3.0/tests/test_sync_client.py +410 -0
  23. {oagi-0.2.0 → oagi-0.3.0}/tests/test_task.py +8 -0
  24. {oagi-0.2.0 → oagi-0.3.0}/uv.lock +1 -1
  25. oagi-0.2.0/PKG-INFO +0 -55
  26. oagi-0.2.0/README.md +0 -21
  27. oagi-0.2.0/src/oagi/screenshot_maker.py +0 -73
  28. oagi-0.2.0/tests/test_logging.py +0 -318
  29. oagi-0.2.0/tests/test_screenshot_maker.py +0 -149
  30. oagi-0.2.0/tests/test_sync_client.py +0 -326
  31. {oagi-0.2.0 → oagi-0.3.0}/.github/workflows/ci.yml +0 -0
  32. {oagi-0.2.0 → oagi-0.3.0}/.github/workflows/release.yml +0 -0
  33. {oagi-0.2.0 → oagi-0.3.0}/.gitignore +0 -0
  34. {oagi-0.2.0 → oagi-0.3.0}/.python-version +0 -0
  35. {oagi-0.2.0 → oagi-0.3.0}/CONTRIBUTING.md +0 -0
  36. {oagi-0.2.0 → oagi-0.3.0}/LICENSE +0 -0
  37. {oagi-0.2.0 → oagi-0.3.0}/Makefile +0 -0
  38. {oagi-0.2.0 → oagi-0.3.0}/examples/execute_task_auto.py +0 -0
  39. {oagi-0.2.0 → oagi-0.3.0}/examples/google_weather.py +0 -0
  40. {oagi-0.2.0 → oagi-0.3.0}/examples/hotel_booking.py +0 -0
  41. {oagi-0.2.0 → oagi-0.3.0}/examples/single_step.py +0 -0
  42. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/exceptions.py +0 -0
  43. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/logging.py +0 -0
  44. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/pyautogui_action_handler.py +0 -0
  45. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/types/action_handler.py +0 -0
  46. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/types/image.py +0 -0
  47. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/types/image_provider.py +0 -0
  48. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/types/models/action.py +0 -0
  49. {oagi-0.2.0 → oagi-0.3.0}/src/oagi/types/models/step.py +0 -0
  50. {oagi-0.2.0 → oagi-0.3.0}/tests/__init__.py +0 -0
  51. {oagi-0.2.0 → oagi-0.3.0}/tests/test_pyautogui_action_handler.py +0 -0
oagi-0.3.0/PKG-INFO ADDED
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.3
2
+ Name: oagi
3
+ Version: 0.3.0
4
+ Summary: Official API of OpenAGI Foundation
5
+ Project-URL: Homepage, https://github.com/agiopen-org/oagi
6
+ Author-email: OpenAGI Foundation <contact@agiopen.org>
7
+ License: MIT License
8
+
9
+ Copyright (c) 2025 OpenAGI Foundation
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
28
+ Requires-Python: >=3.10
29
+ Requires-Dist: httpx>=0.28.0
30
+ Requires-Dist: pillow>=11.3.0
31
+ Requires-Dist: pyautogui>=0.9.54
32
+ Requires-Dist: pydantic>=2.0.0
33
+ Description-Content-Type: text/markdown
34
+
35
+ # OAGI Python SDK
36
+
37
+ Python SDK for the OAGI API - vision-based task automation.
38
+
39
+ ## Installation
40
+
41
+ ```bash
42
+ pip install oagi # requires Python >= 3.10
43
+ ```
44
+
45
+ ## Quick Start
46
+
47
+ Set your API credentials:
48
+ ```bash
49
+ export OAGI_API_KEY="your-api-key"
50
+ export OAGI_BASE_URL="https://api.oagi.com" # or your server URL
51
+ ```
52
+
53
+ ### Single-Step Analysis
54
+
55
+ Analyze a screenshot and get recommended actions:
56
+
57
+ ```python
58
+ from oagi import single_step
59
+
60
+ step = single_step(
61
+ task_description="Click the submit button",
62
+ screenshot="screenshot.png" # or bytes, or Image object
63
+ )
64
+
65
+ print(f"Actions: {step.actions}")
66
+ print(f"Complete: {step.is_complete}")
67
+ ```
68
+
69
+ ### Automated Task Execution
70
+
71
+ Run tasks automatically with screenshot capture and action execution:
72
+
73
+ ```python
74
+ from oagi import ShortTask, ScreenshotMaker, PyautoguiActionHandler
75
+
76
+ task = ShortTask()
77
+ completed = task.auto_mode(
78
+ "Search weather on Google",
79
+ max_steps=10,
80
+ executor=PyautoguiActionHandler(), # Executes mouse/keyboard actions
81
+ image_provider=ScreenshotMaker(), # Captures screenshots
82
+ )
83
+ ```
84
+
85
+ ### Image Processing
86
+
87
+ Process and optimize images before sending to API:
88
+
89
+ ```python
90
+ from oagi import PILImage, ImageConfig
91
+
92
+ # Load and compress an image
93
+ image = PILImage.from_file("large_screenshot.png")
94
+ config = ImageConfig(
95
+ format="JPEG",
96
+ quality=85,
97
+ width=1260,
98
+ height=700
99
+ )
100
+ compressed = image.transform(config)
101
+
102
+ # Use with single_step
103
+ step = single_step("Click button", screenshot=compressed)
104
+ ```
105
+
106
+ ## Examples
107
+
108
+ See the [`examples/`](examples/) directory for more usage patterns:
109
+ - `google_weather.py` - Basic task execution with `ShortTask`
110
+ - `single_step.py` - Basic single-step inference
111
+ - `screenshot_with_config.py` - Image compression and optimization
112
+ - `execute_task_auto.py` - Automated task execution
113
+
114
+ ## Documentation
115
+
116
+
117
+ ## License
118
+
119
+ MIT
oagi-0.3.0/README.md ADDED
@@ -0,0 +1,85 @@
1
+ # OAGI Python SDK
2
+
3
+ Python SDK for the OAGI API - vision-based task automation.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install oagi # requires Python >= 3.10
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ Set your API credentials:
14
+ ```bash
15
+ export OAGI_API_KEY="your-api-key"
16
+ export OAGI_BASE_URL="https://api.oagi.com" # or your server URL
17
+ ```
18
+
19
+ ### Single-Step Analysis
20
+
21
+ Analyze a screenshot and get recommended actions:
22
+
23
+ ```python
24
+ from oagi import single_step
25
+
26
+ step = single_step(
27
+ task_description="Click the submit button",
28
+ screenshot="screenshot.png" # or bytes, or Image object
29
+ )
30
+
31
+ print(f"Actions: {step.actions}")
32
+ print(f"Complete: {step.is_complete}")
33
+ ```
34
+
35
+ ### Automated Task Execution
36
+
37
+ Run tasks automatically with screenshot capture and action execution:
38
+
39
+ ```python
40
+ from oagi import ShortTask, ScreenshotMaker, PyautoguiActionHandler
41
+
42
+ task = ShortTask()
43
+ completed = task.auto_mode(
44
+ "Search weather on Google",
45
+ max_steps=10,
46
+ executor=PyautoguiActionHandler(), # Executes mouse/keyboard actions
47
+ image_provider=ScreenshotMaker(), # Captures screenshots
48
+ )
49
+ ```
50
+
51
+ ### Image Processing
52
+
53
+ Process and optimize images before sending to API:
54
+
55
+ ```python
56
+ from oagi import PILImage, ImageConfig
57
+
58
+ # Load and compress an image
59
+ image = PILImage.from_file("large_screenshot.png")
60
+ config = ImageConfig(
61
+ format="JPEG",
62
+ quality=85,
63
+ width=1260,
64
+ height=700
65
+ )
66
+ compressed = image.transform(config)
67
+
68
+ # Use with single_step
69
+ step = single_step("Click button", screenshot=compressed)
70
+ ```
71
+
72
+ ## Examples
73
+
74
+ See the [`examples/`](examples/) directory for more usage patterns:
75
+ - `google_weather.py` - Basic task execution with `ShortTask`
76
+ - `single_step.py` - Basic single-step inference
77
+ - `screenshot_with_config.py` - Image compression and optimization
78
+ - `execute_task_auto.py` - Automated task execution
79
+
80
+ ## Documentation
81
+
82
+
83
+ ## License
84
+
85
+ MIT
@@ -17,6 +17,9 @@ def execute_task_manual(task_desc, max_steps=5):
17
17
  executor = (
18
18
  PyautoguiActionHandler()
19
19
  ) # executor = lambda actions: print(actions) for debugging
20
+
21
+ # by default, screenshot will be resized to 1260 * 700 and jpeg with quality 85
22
+ # for best image quality, use ScreenshotMaker(config=ImageConfig(format="PNG"))
20
23
  image_provider = ScreenshotMaker()
21
24
 
22
25
  for i in range(max_steps):
@@ -0,0 +1,94 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ from oagi import ImageConfig, PILImage, ScreenshotMaker, single_step
10
+
11
+
12
+ def example_full_png_screenshot():
13
+ """Example 1: Full-quality PNG screenshot without resizing."""
14
+ print("Example 1: Full PNG screenshot without resizing")
15
+
16
+ png_config = ImageConfig(
17
+ format="PNG",
18
+ width=None, # No resizing - keep original width
19
+ height=None, # No resizing - keep original height
20
+ optimize=True, # Optimize PNG file size
21
+ )
22
+
23
+ screenshot_maker = ScreenshotMaker(config=png_config)
24
+ full_png_screenshot = screenshot_maker()
25
+
26
+ print(f"PNG screenshot dimensions: {full_png_screenshot.image.size}")
27
+ print(f"PNG screenshot size: {len(full_png_screenshot.read())} bytes")
28
+ return full_png_screenshot
29
+
30
+
31
+ def example_load_and_compress(file_name):
32
+ """Example 2: Load image from file and convert to compressed JPEG."""
33
+ print("\nExample 2: Load image from file and convert to JPEG")
34
+
35
+ # Load an existing screenshot
36
+ original_image = PILImage.from_file(file_name)
37
+ print(f"Original image dimensions: {original_image.image.size}")
38
+
39
+ # Create a config for compression
40
+ jpeg_config = ImageConfig(
41
+ format="JPEG",
42
+ quality=70, # Lower quality for smaller size
43
+ width=1260, # Resize to 1260px width
44
+ height=700, # Resize to 700px height
45
+ )
46
+
47
+ # Transform the image
48
+ compressed_image = original_image.transform(jpeg_config)
49
+ compressed_bytes = compressed_image.read()
50
+
51
+ print(f"Compressed image dimensions: {compressed_image.image.size}")
52
+ print(f"Compressed JPEG size: {len(compressed_bytes)} bytes")
53
+ return compressed_image
54
+
55
+
56
+ def example_with_single_step(file_name):
57
+ """Example 3: Use compressed image with single_step."""
58
+ print("\nExample 3: Use with single_step")
59
+
60
+ # Load and compress image
61
+ image = PILImage.from_file(file_name)
62
+ print(f"Original image dimensions: {image.image.size}")
63
+
64
+ config = ImageConfig(format="JPEG", quality=85, width=1260, height=700)
65
+ compressed = image.transform(config)
66
+ print(f"Compressed image dimensions: {compressed.image.size}")
67
+
68
+ # Use with single_step
69
+ step = single_step(
70
+ task_description="Click the submit button",
71
+ screenshot=compressed,
72
+ api_key="your-api-key-here",
73
+ base_url="http://127.0.0.1:8000",
74
+ )
75
+
76
+ print(f"Task complete: {step.is_complete}")
77
+ return step
78
+
79
+
80
+ def example_default_config():
81
+ """Example 4: Default configuration (1260x700 JPEG with 85 quality)."""
82
+ print("\nExample 4: Default configuration")
83
+
84
+ default_maker = ScreenshotMaker() # Uses default ImageConfig
85
+ default_screenshot = default_maker()
86
+
87
+ print(f"Default screenshot dimensions: {default_screenshot.image.size}")
88
+ print(f"Default JPEG size: {len(default_screenshot.read())} bytes")
89
+ return default_screenshot
90
+
91
+
92
+ if __name__ == "__main__":
93
+ example_full_png_screenshot()
94
+ example_default_config()
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "oagi"
7
- version = "0.2.0"
7
+ version = "0.3.0"
8
8
  description = "Official API of OpenAGI Foundation"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -18,12 +18,14 @@ from oagi.exceptions import (
18
18
  ServerError,
19
19
  ValidationError,
20
20
  )
21
+ from oagi.pil_image import PILImage
21
22
  from oagi.pyautogui_action_handler import PyautoguiActionHandler
22
23
  from oagi.screenshot_maker import ScreenshotMaker
23
24
  from oagi.short_task import ShortTask
24
25
  from oagi.single_step import single_step
25
26
  from oagi.sync_client import ErrorDetail, ErrorResponse, LLMResponse, SyncClient
26
27
  from oagi.task import Task
28
+ from oagi.types import ImageConfig
27
29
 
28
30
  __all__ = [
29
31
  # Core classes
@@ -32,9 +34,13 @@ __all__ = [
32
34
  "SyncClient",
33
35
  # Functions
34
36
  "single_step",
37
+ # Image classes
38
+ "PILImage",
35
39
  # Handler classes
36
40
  "PyautoguiActionHandler",
37
41
  "ScreenshotMaker",
42
+ # Configuration
43
+ "ImageConfig",
38
44
  # Response models
39
45
  "LLMResponse",
40
46
  "ErrorResponse",
@@ -0,0 +1,98 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import io
10
+ from typing import Optional
11
+
12
+ import pyautogui
13
+ from PIL import Image as PILImageLib
14
+
15
+ from .types.models.image_config import ImageConfig
16
+
17
+
18
+ class PILImage:
19
+ """PIL image wrapper with transformation capabilities."""
20
+
21
+ def __init__(self, image: PILImageLib.Image, config: ImageConfig | None = None):
22
+ """Initialize with a PIL image and optional config."""
23
+ self.image = image
24
+ self.config = config or ImageConfig()
25
+ self._cached_bytes: Optional[bytes] = None
26
+
27
+ @classmethod
28
+ def from_file(cls, path: str, config: ImageConfig | None = None) -> "PILImage":
29
+ """Create PILImage from file path."""
30
+ image = PILImageLib.open(path)
31
+ return cls(image, config)
32
+
33
+ @classmethod
34
+ def from_bytes(cls, data: bytes, config: ImageConfig | None = None) -> "PILImage":
35
+ """Create PILImage from raw bytes."""
36
+ image = PILImageLib.open(io.BytesIO(data))
37
+ return cls(image, config)
38
+
39
+ @classmethod
40
+ def from_screenshot(cls, config: ImageConfig | None = None) -> "PILImage":
41
+ """Create PILImage from screenshot."""
42
+ screenshot = pyautogui.screenshot()
43
+ return cls(screenshot, config)
44
+
45
+ def transform(self, config: ImageConfig) -> "PILImage":
46
+ """Apply transformations (resize) based on config and return new PILImage."""
47
+ # Apply resize if needed
48
+ transformed = self._resize(self.image, config)
49
+ # Return new PILImage with the config (format conversion happens on read())
50
+ return PILImage(transformed, config)
51
+
52
+ def _resize(
53
+ self, image: PILImageLib.Image, config: ImageConfig
54
+ ) -> PILImageLib.Image:
55
+ """Resize image based on config."""
56
+ if config.width or config.height:
57
+ # Get target dimensions (use original if not specified)
58
+ target_width = config.width or image.width
59
+ target_height = config.height or image.height
60
+
61
+ # Map resample string to PIL constant
62
+ resample_map = {
63
+ "NEAREST": PILImageLib.NEAREST,
64
+ "BILINEAR": PILImageLib.BILINEAR,
65
+ "BICUBIC": PILImageLib.BICUBIC,
66
+ "LANCZOS": PILImageLib.LANCZOS,
67
+ }
68
+ resample = resample_map[config.resample]
69
+
70
+ # Resize to exact dimensions
71
+ return image.resize((target_width, target_height), resample)
72
+ return image
73
+
74
+ def _convert_format(self, image: PILImageLib.Image) -> bytes:
75
+ """Convert image to configured format (PNG or JPEG)."""
76
+ buffer = io.BytesIO()
77
+ save_kwargs = {"format": self.config.format}
78
+
79
+ if self.config.format == "JPEG":
80
+ save_kwargs["quality"] = self.config.quality
81
+ # Convert RGBA to RGB for JPEG if needed
82
+ if image.mode == "RGBA":
83
+ rgb_image = PILImageLib.new("RGB", image.size, (255, 255, 255))
84
+ rgb_image.paste(image, mask=image.split()[3])
85
+ rgb_image.save(buffer, **save_kwargs)
86
+ else:
87
+ image.save(buffer, **save_kwargs)
88
+ elif self.config.format == "PNG":
89
+ save_kwargs["optimize"] = self.config.optimize
90
+ image.save(buffer, **save_kwargs)
91
+
92
+ return buffer.getvalue()
93
+
94
+ def read(self) -> bytes:
95
+ """Read image as bytes with current config (implements Image protocol)."""
96
+ if self._cached_bytes is None:
97
+ self._cached_bytes = self._convert_format(self.image)
98
+ return self._cached_bytes
@@ -0,0 +1,41 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ from typing import Optional
10
+
11
+ from .pil_image import PILImage
12
+ from .types import Image
13
+ from .types.models.image_config import ImageConfig
14
+
15
+
16
+ class ScreenshotMaker:
17
+ """Takes screenshots using pyautogui."""
18
+
19
+ def __init__(self, config: ImageConfig | None = None):
20
+ self.config = config or ImageConfig()
21
+ self._last_image: Optional[PILImage] = None
22
+
23
+ def __call__(self) -> Image:
24
+ """Take and process a screenshot."""
25
+ # Create PILImage from screenshot
26
+ pil_image = PILImage.from_screenshot()
27
+
28
+ # Apply transformation if config is set
29
+ if self.config:
30
+ pil_image = pil_image.transform(self.config)
31
+
32
+ # Store as the last image
33
+ self._last_image = pil_image
34
+
35
+ return pil_image
36
+
37
+ def last_image(self) -> Image:
38
+ """Return the last screenshot taken, or take a new one if none exists."""
39
+ if self._last_image is None:
40
+ return self()
41
+ return self._last_image
@@ -16,6 +16,14 @@ logger = get_logger("short_task")
16
16
  class ShortTask(Task):
17
17
  """Task implementation with automatic mode for short-duration tasks."""
18
18
 
19
+ def __init__(
20
+ self,
21
+ api_key: str | None = None,
22
+ base_url: str | None = None,
23
+ model: str = "vision-model-v1",
24
+ ):
25
+ super().__init__(api_key=api_key, base_url=base_url, model=model)
26
+
19
27
  def auto_mode(
20
28
  self,
21
29
  task_desc: str,
@@ -8,6 +8,7 @@
8
8
 
9
9
  from pathlib import Path
10
10
 
11
+ from .pil_image import PILImage
11
12
  from .task import Task
12
13
  from .types import Image, Step
13
14
 
@@ -59,12 +60,12 @@ def single_step(
59
60
  ... screenshot=image
60
61
  ... )
61
62
  """
62
- # Convert file paths to bytes
63
+ # Convert file paths to bytes using PILImage
63
64
  if isinstance(screenshot, (str, Path)):
64
65
  path = Path(screenshot) if isinstance(screenshot, str) else screenshot
65
66
  if path.exists():
66
- with open(path, "rb") as f:
67
- screenshot_bytes = f.read()
67
+ pil_image = PILImage.from_file(str(path))
68
+ screenshot_bytes = pil_image.read()
68
69
  else:
69
70
  raise FileNotFoundError(f"Screenshot file not found: {path}")
70
71
  elif isinstance(screenshot, bytes):
@@ -8,8 +8,10 @@
8
8
 
9
9
  import base64
10
10
  import os
11
+ from functools import wraps
11
12
 
12
13
  import httpx
14
+ from httpx import Response
13
15
  from pydantic import BaseModel
14
16
 
15
17
  from .exceptions import (
@@ -63,6 +65,27 @@ class LLMResponse(BaseModel):
63
65
  error: ErrorDetail | None = None
64
66
 
65
67
 
68
+ def _log_trace_id(response: Response):
69
+ logger.error(f"Request Id: {response.headers.get('x-request-id', '')}")
70
+ logger.error(f"Trace Id: {response.headers.get('x-trace-id', '')}")
71
+
72
+
73
+ def log_trace_on_failure(func):
74
+ """Decorator that logs trace ID when a method fails."""
75
+
76
+ @wraps(func)
77
+ def wrapper(*args, **kwargs):
78
+ try:
79
+ return func(*args, **kwargs)
80
+ except Exception as e:
81
+ # Try to get response from the exception if it has one
82
+ if (response := getattr(e, "response", None)) is not None:
83
+ _log_trace_id(response)
84
+ raise
85
+
86
+ return wrapper
87
+
88
+
66
89
  class SyncClient:
67
90
  def __init__(self, base_url: str | None = None, api_key: str | None = None):
68
91
  # Get from environment if not provided
@@ -98,6 +121,7 @@ class SyncClient:
98
121
  """Close the underlying httpx client"""
99
122
  self.client.close()
100
123
 
124
+ @log_trace_on_failure
101
125
  def create_message(
102
126
  self,
103
127
  model: str,
@@ -16,13 +16,18 @@ logger = get_logger("task")
16
16
  class Task:
17
17
  """Base class for task automation with the OAGI API."""
18
18
 
19
- def __init__(self, api_key: str | None = None, base_url: str | None = None):
19
+ def __init__(
20
+ self,
21
+ api_key: str | None = None,
22
+ base_url: str | None = None,
23
+ model: str = "vision-model-v1",
24
+ ):
20
25
  self.client = SyncClient(base_url=base_url, api_key=api_key)
21
26
  self.api_key = self.client.api_key
22
27
  self.base_url = self.client.base_url
23
28
  self.task_id: str | None = None
24
29
  self.task_description: str | None = None
25
- self.model = "vision-model-v1" # default model
30
+ self.model = model
26
31
 
27
32
  def init_task(self, task_desc: str, max_steps: int = 5):
28
33
  """Initialize a new task with the given description."""
@@ -9,6 +9,14 @@
9
9
  from .action_handler import ActionHandler
10
10
  from .image import Image
11
11
  from .image_provider import ImageProvider
12
- from .models import Action, ActionType, Step
12
+ from .models import Action, ActionType, ImageConfig, Step
13
13
 
14
- __all__ = ["Action", "ActionType", "Image", "Step", "ActionHandler", "ImageProvider"]
14
+ __all__ = [
15
+ "Action",
16
+ "ActionType",
17
+ "Image",
18
+ "ImageConfig",
19
+ "Step",
20
+ "ActionHandler",
21
+ "ImageProvider",
22
+ ]
@@ -7,6 +7,7 @@
7
7
  # -----------------------------------------------------------------------------
8
8
 
9
9
  from .action import Action, ActionType
10
+ from .image_config import ImageConfig
10
11
  from .step import Step
11
12
 
12
- __all__ = ["Action", "ActionType", "Step"]
13
+ __all__ = ["Action", "ActionType", "ImageConfig", "Step"]
@@ -0,0 +1,47 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ from typing import Literal
10
+
11
+ from pydantic import BaseModel, Field, field_validator
12
+
13
+
14
+ class ImageConfig(BaseModel):
15
+ """Configuration for image capture and processing."""
16
+
17
+ format: Literal["PNG", "JPEG"] = Field(
18
+ default="JPEG", description="Image format for encoding"
19
+ )
20
+ quality: int = Field(
21
+ default=85,
22
+ ge=1,
23
+ le=100,
24
+ description="JPEG quality (1-100, only applies to JPEG format)",
25
+ )
26
+ width: int | None = Field(
27
+ default=1260, description="Target width in pixels (will resize to exact size)"
28
+ )
29
+ height: int | None = Field(
30
+ default=700, description="Target height in pixels (will resize to exact size)"
31
+ )
32
+ optimize: bool = Field(
33
+ default=False,
34
+ description="Enable PNG optimization (only applies to PNG format)",
35
+ )
36
+ resample: Literal["NEAREST", "BILINEAR", "BICUBIC", "LANCZOS"] = Field(
37
+ default="LANCZOS", description="Resampling filter for resizing"
38
+ )
39
+
40
+ @field_validator("quality")
41
+ @classmethod
42
+ def validate_quality(cls, v: int, info) -> int:
43
+ """Validate quality parameter based on format."""
44
+ values = info.data
45
+ if values.get("format") == "PNG" and v != 85:
46
+ return 85
47
+ return v