oagi-core 0.13.2__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oagi/__init__.py CHANGED
@@ -81,6 +81,7 @@ _LAZY_IMPORTS_DATA: dict[str, tuple[str, str | None, str | None]] = {
81
81
  "screeninfo",
82
82
  "desktop",
83
83
  ),
84
+ "ScreenManager": ("oagi.handler.screen_manager", None, None),
84
85
  }
85
86
 
86
87
  if TYPE_CHECKING:
@@ -95,6 +96,7 @@ if TYPE_CHECKING:
95
96
  PyautoguiActionHandler,
96
97
  PyautoguiConfig,
97
98
  )
99
+ from oagi.handler.screen_manager import ScreenManager
98
100
  from oagi.handler.screenshot_maker import ScreenshotMaker
99
101
  from oagi.handler.ydotool_action_handler import YdotoolActionHandler, YdotoolConfig
100
102
  from oagi.server.config import ServerConfig
@@ -170,4 +172,6 @@ __all__ = [
170
172
  "AsyncYdotoolActionHandler",
171
173
  "YdotoolActionHandler",
172
174
  "YdotoolConfig",
175
+ # Lazy imports - Screen manager
176
+ "ScreenManager",
173
177
  ]
oagi/cli/agent.py CHANGED
@@ -86,6 +86,11 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
86
86
  type=float,
87
87
  help=f"Delay in seconds after each step before next screenshot (default: {DEFAULT_STEP_DELAY})",
88
88
  )
89
+ run_parser.add_argument(
90
+ "--screen-index",
91
+ type=int,
92
+ help="Choose the index of screen to run the task",
93
+ )
89
94
 
90
95
  # agent modes command
91
96
  agent_subparsers.add_parser("modes", help="List available agent modes")
@@ -96,6 +101,11 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
96
101
  help="Check macOS permissions for screen recording and accessibility",
97
102
  )
98
103
 
104
+ # agent screens command
105
+ agent_subparsers.add_parser(
106
+ "screens", help="List all available screens for agent execution"
107
+ )
108
+
99
109
 
100
110
  def handle_agent_command(args: argparse.Namespace) -> None:
101
111
  if args.agent_command == "run":
@@ -104,6 +114,19 @@ def handle_agent_command(args: argparse.Namespace) -> None:
104
114
  list_modes()
105
115
  elif args.agent_command == "permission":
106
116
  check_permissions()
117
+ elif args.agent_command == "screens":
118
+ list_screens()
119
+
120
+
121
+ def list_screens() -> None:
122
+ """List all available screens for agent execution."""
123
+ from oagi import ScreenManager # noqa: PLC0415
124
+
125
+ screen_manager = ScreenManager()
126
+ screens = screen_manager.get_all_screens()
127
+ print("Available screens:")
128
+ for screen_index, screen in enumerate(screens):
129
+ print(f" - Index {screen_index}: {screen}")
107
130
 
108
131
 
109
132
  def list_modes() -> None:
@@ -212,6 +235,22 @@ def run_agent(args: argparse.Namespace) -> None:
212
235
  from oagi.agent import create_agent # noqa: PLC0415
213
236
  from oagi.handler.wayland_support import is_wayland_display_server # noqa: PLC0415
214
237
 
238
+ # Create screen manager for multi-screen support
239
+ # Must be initialized before importing pyautogui to ensure correct DPI awareness in Windows
240
+ target_screen = None
241
+ if args.screen_index is not None:
242
+ from oagi.handler import ScreenManager # noqa: PLC0415
243
+
244
+ screen_index = args.screen_index
245
+ screen_manager = ScreenManager()
246
+ all_screens = screen_manager.get_all_screens()
247
+ if screen_index >= len(all_screens) or screen_index < 0:
248
+ raise ValueError(
249
+ f"Error: Screen index {screen_index} not found. Available screen indices: {list(range(len(all_screens)))}"
250
+ )
251
+ target_screen = all_screens[screen_index]
252
+ print(f"Target screen: {target_screen}")
253
+
215
254
  # Select appropriate action handler based on display server
216
255
  if is_wayland_display_server():
217
256
  check_optional_dependency("screeninfo", "Agent execution (Wayland)", "desktop")
@@ -280,6 +319,11 @@ def run_agent(args: argparse.Namespace) -> None:
280
319
  # Create image provider
281
320
  image_provider = AsyncScreenshotMaker()
282
321
 
322
+ if target_screen:
323
+ # Set the target screen for the image and action provider
324
+ image_provider.set_target_screen(target_screen)
325
+ action_handler.set_target_screen(target_screen)
326
+
283
327
  if args.instruction:
284
328
  print(f"Starting agent with instruction: {args.instruction}")
285
329
  else:
oagi/constants.py CHANGED
@@ -35,7 +35,7 @@ DEFAULT_REFLECTION_INTERVAL = 4
35
35
  DEFAULT_REFLECTION_INTERVAL_TASKER = 20
36
36
 
37
37
  # Timing & Delays
38
- DEFAULT_STEP_DELAY = 0.3
38
+ DEFAULT_STEP_DELAY = 1.0
39
39
 
40
40
  # Temperature Defaults
41
41
  DEFAULT_TEMPERATURE = 0.5
oagi/handler/__init__.py CHANGED
@@ -21,6 +21,7 @@ _LAZY_IMPORTS: dict[str, str] = {
21
21
  "AsyncYdotoolActionHandler": "oagi.handler.async_ydotool_action_handler",
22
22
  "YdotoolActionHandler": "oagi.handler.ydotool_action_handler",
23
23
  "YdotoolConfig": "oagi.handler.ydotool_action_handler",
24
+ "ScreenManager": "oagi.handler.screen_manager",
24
25
  }
25
26
 
26
27
  if TYPE_CHECKING:
@@ -32,6 +33,7 @@ if TYPE_CHECKING:
32
33
  PyautoguiActionHandler,
33
34
  PyautoguiConfig,
34
35
  )
36
+ from oagi.handler.screen_manager import ScreenManager
35
37
  from oagi.handler.screenshot_maker import ScreenshotMaker
36
38
  from oagi.handler.ydotool_action_handler import YdotoolActionHandler, YdotoolConfig
37
39
 
@@ -60,4 +62,5 @@ __all__ = [
60
62
  "YdotoolConfig",
61
63
  "YdotoolActionHandler",
62
64
  "AsyncYdotoolActionHandler",
65
+ "ScreenManager",
63
66
  ]
@@ -8,6 +8,8 @@
8
8
 
9
9
  import asyncio
10
10
 
11
+ from oagi.handler.screen_manager import Screen
12
+
11
13
  from ..types import Action
12
14
  from .pyautogui_action_handler import PyautoguiActionHandler, PyautoguiConfig
13
15
 
@@ -29,6 +31,14 @@ class AsyncPyautoguiActionHandler:
29
31
  self.sync_handler = PyautoguiActionHandler(config=config)
30
32
  self.config = config or PyautoguiConfig()
31
33
 
34
+ def set_target_screen(self, screen: Screen) -> None:
35
+ """Set the target screen for the action handler.
36
+
37
+ Args:
38
+ screen (Screen): The screen object to set as the target.
39
+ """
40
+ self.sync_handler.set_target_screen(screen)
41
+
32
42
  def reset(self):
33
43
  """Reset handler state.
34
44
 
@@ -8,6 +8,8 @@
8
8
 
9
9
  import asyncio
10
10
 
11
+ from oagi.handler.screen_manager import Screen
12
+
11
13
  from ..types import Image, ImageConfig
12
14
  from .screenshot_maker import ScreenshotMaker
13
15
 
@@ -29,6 +31,9 @@ class AsyncScreenshotMaker:
29
31
  self.sync_screenshot_maker = ScreenshotMaker(config=config)
30
32
  self.config = config
31
33
 
34
+ def set_target_screen(self, screen: Screen) -> None:
35
+ self.sync_screenshot_maker.set_target_screen(screen)
36
+
32
37
  async def __call__(self) -> Image:
33
38
  """
34
39
  Capture a screenshot asynchronously using a thread pool executor.
@@ -8,6 +8,8 @@
8
8
 
9
9
  import asyncio
10
10
 
11
+ from oagi.handler.screen_manager import Screen
12
+
11
13
  from ..types import Action
12
14
  from .ydotool_action_handler import YdotoolActionHandler, YdotoolConfig
13
15
 
@@ -29,6 +31,14 @@ class AsyncYdotoolActionHandler:
29
31
  self.config = config or YdotoolConfig()
30
32
  self.sync_handler = YdotoolActionHandler(config=self.config)
31
33
 
34
+ def set_target_screen(self, screen: Screen) -> None:
35
+ """Set the target screen for the action handler.
36
+
37
+ Args:
38
+ screen (Screen): The screen object to set as the target.
39
+ """
40
+ self.sync_handler.set_target_screen(screen)
41
+
32
42
  def reset(self):
33
43
  """Reset handler state.
34
44
 
oagi/handler/pil_image.py CHANGED
@@ -7,6 +7,7 @@
7
7
  # -----------------------------------------------------------------------------
8
8
 
9
9
  import io
10
+ import sys
10
11
 
11
12
  from ..exceptions import check_optional_dependency
12
13
  from ..types.models.image_config import ImageConfig
@@ -39,17 +40,47 @@ class PILImage:
39
40
  return cls(image, config)
40
41
 
41
42
  @classmethod
42
- def from_screenshot(cls, config: ImageConfig | None = None) -> "PILImage":
43
- """Create PILImage from screenshot."""
43
+ def from_screenshot(
44
+ cls,
45
+ config: ImageConfig | None = None,
46
+ region: tuple[int, int, int, int] | None = None,
47
+ ) -> "PILImage":
48
+ """Create PILImage from screenshot.
49
+
50
+ Args:
51
+ config: ImageConfig for transformations
52
+ region: Optional (x, y, width, height) tuple for cropping
53
+ """
44
54
  # Use flameshot by default in Wayland display environment
45
55
  if is_wayland_display_server():
46
- return cls(wayland_screenshot(), config)
56
+ return cls(wayland_screenshot(region=region), config)
47
57
 
48
58
  # Lazy import to avoid DISPLAY issues in headless environments
49
59
  check_optional_dependency("pyautogui", "PILImage.from_screenshot()", "desktop")
50
60
  import pyautogui # noqa: PLC0415
51
61
 
52
- screenshot = pyautogui.screenshot()
62
+ if sys.platform == "win32" and region is not None:
63
+ # Use mss instead of pyautogui for screenshots in multi-monitor Windows setups
64
+ import mss # noqa: PLC0415
65
+
66
+ with mss.mss() as sct:
67
+ screenshot_data = sct.grab(
68
+ {
69
+ "top": region[1],
70
+ "left": region[0],
71
+ "width": region[2],
72
+ "height": region[3],
73
+ }
74
+ )
75
+ screenshot = PILImageLib.frombytes(
76
+ "RGB",
77
+ screenshot_data.size,
78
+ screenshot_data.bgra,
79
+ "raw",
80
+ "BGRX",
81
+ )
82
+ else:
83
+ screenshot = pyautogui.screenshot(region=region)
53
84
  return cls(screenshot, config)
54
85
 
55
86
  def transform(self, config: ImageConfig) -> "PILImage":
@@ -11,6 +11,8 @@ import time
11
11
 
12
12
  from pydantic import BaseModel, Field
13
13
 
14
+ from oagi.handler.screen_manager import Screen
15
+
14
16
  from ..exceptions import check_optional_dependency
15
17
  from ..types import Action, ActionType, parse_coords, parse_drag_coords, parse_scroll
16
18
  from .capslock_manager import CapsLockManager
@@ -81,6 +83,8 @@ class PyautoguiActionHandler:
81
83
  pyautogui.PAUSE = self.config.action_pause
82
84
  # Initialize caps lock manager
83
85
  self.caps_manager = CapsLockManager(mode=self.config.capslock_mode)
86
+ # The origin position of coordinates (the top-left corner of the target screen)
87
+ self.origin_x, self.origin_y = 0, 0
84
88
 
85
89
  def reset(self):
86
90
  """Reset handler state.
@@ -90,6 +94,15 @@ class PyautoguiActionHandler:
90
94
  """
91
95
  self.caps_manager.reset()
92
96
 
97
+ def set_target_screen(self, screen: Screen) -> None:
98
+ """Set the target screen for the action handler.
99
+
100
+ Args:
101
+ screen (Screen): The screen object to set as the target.
102
+ """
103
+ self.screen_width, self.screen_height = screen.width, screen.height
104
+ self.origin_x, self.origin_y = screen.x, screen.y
105
+
93
106
  def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
94
107
  """Convert coordinates from 0-1000 range to actual screen coordinates.
95
108
 
@@ -111,6 +124,10 @@ class PyautoguiActionHandler:
111
124
  elif screen_y > self.screen_height - 1:
112
125
  screen_y = self.screen_height - 1
113
126
 
127
+ # Add origin offset to convert relative to top-left corner
128
+ screen_x += self.origin_x
129
+ screen_y += self.origin_y
130
+
114
131
  return screen_x, screen_y
115
132
 
116
133
  def _parse_coords(self, args_str: str) -> tuple[int, int]:
@@ -0,0 +1,187 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+
10
+ import sys
11
+ from dataclasses import dataclass
12
+
13
+ from oagi.exceptions import check_optional_dependency
14
+
15
+ # Guard flag to prevent multiple DPI awareness calls on Windows
16
+ _dpi_awareness_set = False
17
+
18
+
19
+ @dataclass
20
+ class Screen:
21
+ """
22
+ Screen represents a single display screen.
23
+
24
+ Attributes:
25
+ name (str): The name of the screen.
26
+ x (int): The x-coordinate of the top-left corner of the screen.
27
+ y (int): The y-coordinate of the top-left corner of the screen.
28
+ width (int): The width of the screen in pixels.
29
+ height (int): The height of the screen in pixels.
30
+ is_primary (bool): True if this is the primary screen, False otherwise.
31
+ """
32
+
33
+ name: str
34
+ x: int
35
+ y: int
36
+ width: int
37
+ height: int
38
+ is_primary: bool = False
39
+
40
+
41
+ class ScreenManager:
42
+ """
43
+ ScreenManager is responsible for detecting and managing screens.
44
+ """
45
+
46
+ def __init__(self):
47
+ self.screens = []
48
+ # Enable DPI awareness if on Windows
49
+ if sys.platform == "win32":
50
+ self.enable_windows_dpi_awareness()
51
+
52
+ def get_all_screens(self) -> list[Screen]:
53
+ if self.screens:
54
+ return self.screens
55
+ if sys.platform == "darwin":
56
+ screens = self._get_darwin_screen_info()
57
+ elif sys.platform == "win32":
58
+ screens = self._get_windows_screen_info()
59
+ else:
60
+ screens = self._get_linux_screen_info()
61
+ # Find the primary screen
62
+ primary_screen, alternative_screens = None, []
63
+ for screen in screens:
64
+ if screen.is_primary:
65
+ primary_screen = screen
66
+ else:
67
+ alternative_screens.append(screen)
68
+ # order the alternative_screens by x coordinate ascending and y coordinate ascending
69
+ alternative_screens = sorted(
70
+ alternative_screens, key=lambda item: (item.x, item.y)
71
+ )
72
+ # Add the primary screen to the front of the list if it exists
73
+ if primary_screen:
74
+ self.screens = [primary_screen]
75
+ self.screens += alternative_screens
76
+ return self.screens
77
+
78
+ def _get_darwin_screen_info(self) -> list[Screen]:
79
+ """
80
+ Get screen information for macOS using AppKit.
81
+
82
+ Returns:
83
+ list[Screen]: A list of Screen objects representing all detected screens.
84
+ """
85
+ check_optional_dependency("AppKit", "ScreenManager", "desktop")
86
+ import AppKit # noqa: PLC0415
87
+
88
+ # Force the RunLoop to update once
89
+ # This "accepts input" which forces macOS to update screen geometry
90
+ loop = AppKit.NSRunLoop.currentRunLoop()
91
+ loop.acceptInputForMode_beforeDate_(
92
+ AppKit.NSDefaultRunLoopMode, AppKit.NSDate.distantPast()
93
+ )
94
+ # Retrieve screen information using AppKit
95
+ screens = AppKit.NSScreen.screens()
96
+ screen_list = []
97
+ for screen in screens:
98
+ frame = screen.frame()
99
+ # Origin (0,0) is bottom-left of the primary screen
100
+ x, y = int(frame.origin.x), int(frame.origin.y)
101
+ width, height = int(frame.size.width), int(frame.size.height)
102
+ name = screen.localizedName()
103
+ # Normalize the origin to Top-Left
104
+ y = int(AppKit.NSScreen.screens()[0].frame().size.height) - (y + height)
105
+ screen_list.append(Screen(name, x, y, width, height, x == 0 and y == 0))
106
+ return screen_list
107
+
108
+ def _get_windows_screen_info(self) -> list[Screen]:
109
+ """
110
+ Get screen information for Windows using mss.
111
+
112
+ Returns:
113
+ list[Screen]: A list of Screen objects representing all detected screens.
114
+ """
115
+ check_optional_dependency("mss", "ScreenManager", "desktop")
116
+ import mss # noqa: PLC0415
117
+
118
+ screen_list = []
119
+ for index, screen in enumerate(mss.mss().monitors[1:]):
120
+ screen_list.append(
121
+ Screen(
122
+ f"DISPLAY{index}",
123
+ screen["left"],
124
+ screen["top"],
125
+ screen["width"],
126
+ screen["height"],
127
+ screen["top"] == 0 and screen["left"] == 0,
128
+ )
129
+ )
130
+ return screen_list
131
+
132
+ def _get_linux_screen_info(self) -> list[Screen]:
133
+ """
134
+ Get screen information for Linux and other platforms as default.
135
+
136
+ Returns:
137
+ list[Screen]: A list of Screen objects representing all detected screens.
138
+ """
139
+ check_optional_dependency("screeninfo", "ScreenManager", "desktop")
140
+ import screeninfo # noqa: PLC0415
141
+
142
+ screen_list = []
143
+ for screen in screeninfo.get_monitors():
144
+ screen_list.append(
145
+ Screen(
146
+ screen.name,
147
+ screen.x,
148
+ screen.y,
149
+ screen.width,
150
+ screen.height,
151
+ screen.is_primary,
152
+ )
153
+ )
154
+ return screen_list
155
+
156
+ @staticmethod
157
+ def enable_windows_dpi_awareness():
158
+ """
159
+ Enable per-monitor DPI awareness to fix multi-monitor scaling issues.
160
+
161
+ On Windows with mixed scaling between monitors, applications that are not
162
+ DPI-aware will have their coordinates virtualized, causing clicks/moves to
163
+ land at incorrect positions. Enabling DPI awareness ensures PyAutoGUI and mss
164
+ works in physical pixels across all monitors.
165
+
166
+ This method is idempotent - subsequent calls after the first successful call
167
+ will be no-ops.
168
+ """
169
+ global _dpi_awareness_set
170
+ if _dpi_awareness_set:
171
+ return
172
+
173
+ import ctypes # noqa: PLC0415
174
+
175
+ try:
176
+ # For Windows 8.1 and Windows 10/11
177
+ # 2 = PROCESS_PER_MONITOR_DPI_AWARE
178
+ PROCESS_PER_MONITOR_DPI_AWARE = 2
179
+ ctypes.windll.shcore.SetProcessDpiAwareness(PROCESS_PER_MONITOR_DPI_AWARE)
180
+ _dpi_awareness_set = True
181
+ except Exception:
182
+ try:
183
+ # Fallback for older Windows versions
184
+ ctypes.windll.user32.SetProcessDPIAware()
185
+ _dpi_awareness_set = True
186
+ except Exception:
187
+ raise RuntimeError("Could not set DPI awareness")
@@ -8,6 +8,8 @@
8
8
 
9
9
  from typing import Optional
10
10
 
11
+ from oagi.handler.screen_manager import Screen
12
+
11
13
  from ..types import Image
12
14
  from ..types.models.image_config import ImageConfig
13
15
  from .pil_image import PILImage
@@ -19,11 +21,16 @@ class ScreenshotMaker:
19
21
  def __init__(self, config: ImageConfig | None = None):
20
22
  self.config = config or ImageConfig()
21
23
  self._last_image: Optional[PILImage] = None
24
+ self.region: Optional[tuple[int, int, int, int]] = None
25
+
26
+ def set_target_screen(self, screen: Screen) -> None:
27
+ """Set the target screen for screenshotting."""
28
+ self.region = (screen.x, screen.y, screen.width, screen.height)
22
29
 
23
30
  def __call__(self) -> Image:
24
31
  """Take and process a screenshot."""
25
32
  # Create PILImage from screenshot
26
- pil_image = PILImage.from_screenshot()
33
+ pil_image = PILImage.from_screenshot(region=self.region)
27
34
 
28
35
  # Apply transformation if config is set
29
36
  if self.config:
@@ -43,7 +43,7 @@ def get_screen_size() -> tuple[int, int]:
43
43
  raise Exception("No monitor found, cannot get the screen size info")
44
44
 
45
45
 
46
- def screenshot() -> Image:
46
+ def screenshot(region: tuple[int, int, int, int] | None = None) -> Image:
47
47
  """
48
48
  Use Flameshot to take a screenshot and return an Image object
49
49
 
@@ -52,7 +52,11 @@ def screenshot() -> Image:
52
52
  # Check if flameshot is installed
53
53
  if shutil.which("flameshot") is None:
54
54
  raise RuntimeError("flameshot not found. Ensure it is installed and in PATH.")
55
- cmd = ["flameshot", "full", "--region", "all", "--raw"]
55
+ cmd = ["flameshot", "full", "--raw"]
56
+ if region:
57
+ cmd.extend(["--region", f"{region[2]}x{region[3]}+{region[0]}+{region[1]}"])
58
+ else:
59
+ cmd.extend(["--region", "all"])
56
60
  res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
57
61
  if res.returncode != 0:
58
62
  raise RuntimeError(
@@ -10,6 +10,8 @@ import time
10
10
 
11
11
  from pydantic import BaseModel, Field
12
12
 
13
+ from oagi.handler.screen_manager import Screen
14
+
13
15
  from ..types import Action, ActionType, parse_coords, parse_drag_coords, parse_scroll
14
16
  from .capslock_manager import CapsLockManager
15
17
  from .wayland_support import Ydotool, get_screen_size
@@ -62,6 +64,8 @@ class YdotoolActionHandler(Ydotool):
62
64
  self.action_pause = self.config.action_pause
63
65
  # Initialize caps lock manager
64
66
  self.caps_manager = CapsLockManager(mode=self.config.capslock_mode)
67
+ # The origin position of coordinates (the top-left corner of the screen)
68
+ self.origin_x, self.origin_y = 0, 0
65
69
 
66
70
  def reset(self):
67
71
  """Reset handler state.
@@ -71,6 +75,15 @@ class YdotoolActionHandler(Ydotool):
71
75
  """
72
76
  self.caps_manager.reset()
73
77
 
78
+ def set_target_screen(self, screen: Screen) -> None:
79
+ """Set the target screen for the action handler.
80
+
81
+ Args:
82
+ screen (Screen): The screen object to set as the target.
83
+ """
84
+ self.screen_width, self.screen_height = screen.width, screen.height
85
+ self.origin_x, self.origin_y = screen.x, screen.y
86
+
74
87
  def _execute_action(self, action: Action) -> bool:
75
88
  """
76
89
  Execute a group of actions and return whether FINISH is reached.
@@ -168,6 +181,10 @@ class YdotoolActionHandler(Ydotool):
168
181
  elif screen_y > self.screen_height - 1:
169
182
  screen_y = self.screen_height - 1
170
183
 
184
+ # Add origin offset to convert relative to top-left corner
185
+ screen_x += self.origin_x
186
+ screen_y += self.origin_y
187
+
171
188
  return screen_x, screen_y
172
189
 
173
190
  def _normalize_key(self, key: str) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: oagi-core
3
- Version: 0.13.2
3
+ Version: 0.14.0
4
4
  Summary: Official API of OpenAGI Foundation
5
5
  Project-URL: Homepage, https://github.com/agiopen-org/oagi
6
6
  Author-email: OpenAGI Foundation <contact@agiopen.org>
@@ -31,6 +31,7 @@ Requires-Dist: openai>=1.3.0
31
31
  Requires-Dist: pydantic>=2.0.0
32
32
  Requires-Dist: rich>=10.0.0
33
33
  Provides-Extra: desktop
34
+ Requires-Dist: mss>=9.0.0; (sys_platform == 'win32') and extra == 'desktop'
34
35
  Requires-Dist: pillow>=9.0.0; extra == 'desktop'
35
36
  Requires-Dist: pyautogui>=0.9.54; extra == 'desktop'
36
37
  Requires-Dist: pyobjc-framework-applicationservices>=8.0; (sys_platform == 'darwin') and extra == 'desktop'
@@ -184,8 +185,14 @@ oagi agent modes
184
185
  # Check macOS permissions (screen recording & accessibility)
185
186
  oagi agent permission
186
187
 
188
+ # Print all available screens and their indices
189
+ oagi agent screens
190
+
187
191
  # Export execution history
188
192
  oagi agent run "Complete the form" --export html --export-file report.html
193
+
194
+ # Run with a specific screen
195
+ oagi agent run "Search weather on Google" --screen-index 1
189
196
  ```
190
197
 
191
198
  CLI options:
@@ -196,6 +203,7 @@ CLI options:
196
203
  - `--step-delay`: Delay after each action before next screenshot (default: 0.3s)
197
204
  - `--export`: Export format (markdown, html, json)
198
205
  - `--export-file`: Output file path for export
206
+ - `--screen-index`: Screen index for multi-screen environments
199
207
 
200
208
  ### Image Processing
201
209
 
@@ -280,11 +288,61 @@ config = YdotoolConfig(
280
288
  action_handler = AsyncYdotoolActionHandler(config=config)
281
289
  ```
282
290
 
291
+ ### Multi-Screen Execution
292
+ When running on multi-screen environments, you can choose which screen to use for task execution. The `ScreenManager` class provides methods to list available screens, while the `AsyncPyautoguiActionHandler` and `AsyncScreenshotMaker` classes allow you to set the target screen for actions and screenshots. In the result of `get_all_screens`, the primary screen is always the first one in the list and the remaining screens are appended in the ascending order of their origin coordinates.
293
+
294
+ ```python
295
+ import asyncio
296
+ import sys
297
+ from oagi import ScreenManager
298
+
299
+ # Must be initialized before importing pyautogui to ensure correct DPI awareness in Windows
300
+ if sys.platform == "win32":
301
+ ScreenManager.enable_windows_dpi_awareness()
302
+
303
+ from oagi import (
304
+ AsyncDefaultAgent,
305
+ AsyncPyautoguiActionHandler,
306
+ AsyncScreenshotMaker,
307
+ )
308
+
309
+ def print_all_screens():
310
+ """Print all available screens."""
311
+ screen_manager = ScreenManager()
312
+ all_screens = screen_manager.get_all_screens()
313
+ print("Available screens:")
314
+ for screen_index, screen in enumerate(all_screens):
315
+ print(f" - Index {screen_index}: {screen}")
316
+
317
+ async def main():
318
+ agent = AsyncDefaultAgent(max_steps=10)
319
+ action_handler = AsyncPyautoguiActionHandler()
320
+ image_provider = AsyncScreenshotMaker()
321
+ # Get all available screens
322
+ screen_manager = ScreenManager()
323
+ all_screens = screen_manager.get_all_screens()
324
+ # Choose a screen for task execution
325
+ screen_index = 1 # Use the second screen as example
326
+ target_screen = all_screens[screen_index]
327
+ # Set the target screen for handlers
328
+ action_handler.set_target_screen(target_screen)
329
+ image_provider.set_target_screen(target_screen)
330
+ completed = await agent.execute(
331
+ "Search weather on Google",
332
+ action_handler=action_handler,
333
+ image_provider=image_provider,
334
+ )
335
+ return completed
336
+
337
+ asyncio.run(main())
338
+ ```
339
+
283
340
  ## Examples
284
341
 
285
342
  See the [`examples/`](examples/) directory for more usage patterns:
286
343
  - `execute_task_auto.py` - Automated task execution with `AsyncDefaultAgent`
287
344
  - `execute_task_manual.py` - Manual step-by-step control with `Actor`
345
+ - `multi_screen_execution.py` - Automated task execution on multi-screen environments
288
346
  - `continued_session.py` - Continuing tasks across sessions
289
347
  - `screenshot_with_config.py` - Image compression and optimization
290
348
  - `socketio_server_basic.py` - Socket.IO server example
@@ -1,5 +1,5 @@
1
- oagi/__init__.py,sha256=ly956qE0F4FlzaU-kPsKAhpG2Kc3D9T_BJDZmWqioOc,5616
2
- oagi/constants.py,sha256=VUEgpwup55rTrvPC2xhtjZgpUHJlVaNRidiUlXXEjx0,1228
1
+ oagi/__init__.py,sha256=TVBbIMEXQh5Z8JumqrivMPBLYt3E5HmGPi973NDx2Qs,5797
2
+ oagi/constants.py,sha256=qcNisliLRZUr6krXpS49AAPfFYvkZuWQcAJ8ZdEnhic,1228
3
3
  oagi/exceptions.py,sha256=Rco37GQTPYUfc2vRO3hozxPF_s8mKFDpFvBg2UKWo3Y,3066
4
4
  oagi/logging.py,sha256=YT3KCMFj5fzO98R9xlDDgfSotUuz1xRD6OZeYM2rKoo,1760
5
5
  oagi/platform_info.py,sha256=GEqNWnwePszVEM21toGGi07o3PaX8O059CYRr0RUM_M,1424
@@ -27,7 +27,7 @@ oagi/agent/tasker/planner.py,sha256=q6IvH6sfU2kYX1NcC9VHjGaQ0X9jF18yjuAYXisNCg0,
27
27
  oagi/agent/tasker/taskee_agent.py,sha256=OugYJbTbFKxgNjbIyQBBH4Zm5u5PuWN1F6R81_eIro8,18090
28
28
  oagi/agent/tasker/tasker_agent.py,sha256=yb0BdQzJyAPpK3njHPWgQruV8zpUGBXn1WjOGEMIO-g,11291
29
29
  oagi/cli/__init__.py,sha256=aDnJViTseShpo5fdGPTj-ELysZhmdvB6Z8mEj2D-_N4,359
30
- oagi/cli/agent.py,sha256=lT95jgfzf-LizOWbzYfhq_EkcYcLvJH2uL-HKnYW_es,11533
30
+ oagi/cli/agent.py,sha256=CWvwwbo4eiq-USYmDHnKafEX8Nk6zsGsUmNzB4QftkQ,13186
31
31
  oagi/cli/display.py,sha256=Y8_Dn5RIEfRqZUHVGF6URItW0C3XC7bPLWoAmmhvBS0,1829
32
32
  oagi/cli/main.py,sha256=faHns0HaQCGyylDn2YZLpjQESuEiMYjoQVoMkt8FsH4,2292
33
33
  oagi/cli/server.py,sha256=JFpzCOeaftITxesz8Ya-_Efs03bgotBg7aYwmMZhPwU,3033
@@ -37,20 +37,21 @@ oagi/client/__init__.py,sha256=F9DShPUdb6vZYmN1fpM1VYzp4MWqUao_e_R1KYmM4Q4,410
37
37
  oagi/client/async_.py,sha256=BANE0KU14WBuXp6suBhr8JSlpWhN5SR2aJJ7wAJBDLQ,9574
38
38
  oagi/client/base.py,sha256=CWAvE0AcpL8HD_i00n7Fq53AIAQGhBhS_n6LifUCqxE,14736
39
39
  oagi/client/sync.py,sha256=4xNqqNihXmgLU385h22mMJ9wmmlw-jeOdWI4fmpEpTk,9369
40
- oagi/handler/__init__.py,sha256=rqxbj6Bp9MGndLBVhqRx8b-GSYR3PC5ktErMvd-HzRI,2397
40
+ oagi/handler/__init__.py,sha256=ZMQIeN_uJKUK_dn0w7ggsPfdRzzwts7G-Sppsrt22Lg,2528
41
41
  oagi/handler/_macos.py,sha256=Gs8GrhA_WAyv9Yw0D41duliP32Xk6vouyMeWjWJJT90,5187
42
42
  oagi/handler/_windows.py,sha256=MSgPDYEOetSjbn9eJDSrdzBVlUGgGsTlegaTDc4C4Ss,2828
43
43
  oagi/handler/_ydotool.py,sha256=WjvE6RGRm8j3SEWpgfMw31aow3z3qkiMupuUHYt-QAM,2948
44
- oagi/handler/async_pyautogui_action_handler.py,sha256=wfNRBBURZnwQkNTcs9OPMmFJIAPtnXmcqxWbjda_q7I,1863
45
- oagi/handler/async_screenshot_maker.py,sha256=8QCtUV59ozpOpvkqhUMb8QDI2qje2gsoFT1qB60tfJM,1689
46
- oagi/handler/async_ydotool_action_handler.py,sha256=BRGqZB2u1k7R1acUX9k0TfdrmWS2eh3opc8LoqnlwJ4,1848
44
+ oagi/handler/async_pyautogui_action_handler.py,sha256=ZYySg5dSu4MDX0ngwVUJaX3uZA2CuecPN2UqFnCbnec,2174
45
+ oagi/handler/async_screenshot_maker.py,sha256=_myV4Rq6X_evCOuatalFSW5nsUDXi_0ej0GQ7V4n3JE,1856
46
+ oagi/handler/async_ydotool_action_handler.py,sha256=HB4QQk3OaG08g37eLb3EwsnkWKWPrpDei0ZsnBxrGZY,2159
47
47
  oagi/handler/capslock_manager.py,sha256=40LzWt1_1wbncF5koUTdbd9V3eo5Ex_mEWwjtEmHAf4,1878
48
- oagi/handler/pil_image.py,sha256=GQw2o8ORQinrM3AxhgNBbLhrkZajOL8YagU7UF-kkes,4357
49
- oagi/handler/pyautogui_action_handler.py,sha256=P6YPgsXr3mQn_lh6rVLzWBomg9s5EIwbKZYbOgCAa5A,10640
50
- oagi/handler/screenshot_maker.py,sha256=j1jTW-awx3vAnb1N5_FIMBC0Z-rNVQbiBP-S6Gh5dlE,1284
48
+ oagi/handler/pil_image.py,sha256=s8UGZ6ALbmOxRO2GL1EUFN7_6ZEFseSE9OHABCe7wek,5380
49
+ oagi/handler/pyautogui_action_handler.py,sha256=HvupClYrmYuIx_xvXj5SyacDX781bpZkMGEOeXoSQTU,11301
50
+ oagi/handler/screen_manager.py,sha256=FV0-6ZyTVv9yZlAg4Krga0xW9O_LDsk1iaCJjWgET-g,6565
51
+ oagi/handler/screenshot_maker.py,sha256=740k7NjDRKW6KwVqy_nVoczgVuw9_yTKM0gLFKB1iNs,1600
51
52
  oagi/handler/utils.py,sha256=jj10z-v4_LUuVb8aClyXkUfZVEaqsWgi3be4t3Gw7oI,697
52
- oagi/handler/wayland_support.py,sha256=jeQDqpwAxxREaGAYePQuK14nuEMPGmMEvMz2ymS-rT4,7727
53
- oagi/handler/ydotool_action_handler.py,sha256=8cmOFaEsYMI5BiYdYRuUIaSXDNgrdk-B3OnHVbEa9Sk,8608
53
+ oagi/handler/wayland_support.py,sha256=qUIAQMqc3wp1VIypVmZjFDYT8t0yH0QvikTTV8pD-XA,7905
54
+ oagi/handler/ydotool_action_handler.py,sha256=y-mWXM88j3baPPywXqwASSx7GAs7LbqTgpCA1gN4nF0,9262
54
55
  oagi/server/__init__.py,sha256=uZx8u3vJUb87kkNzwmmVrgAgbqRu0WxyMIQCLSx56kk,452
55
56
  oagi/server/agent_wrappers.py,sha256=j8va0A7u80bzOM82nndAplK1uaO_T3kufHWScK6kfWM,3263
56
57
  oagi/server/config.py,sha256=AJ1PLKuxrc6pRuur1hm5DwG2g2otxPwOCfKgzIACkSk,1691
@@ -75,8 +76,8 @@ oagi/types/models/step.py,sha256=RSI4H_2rrUBq_xyCoWKaq7JHdJWNobtQppaKC1l0aWU,471
75
76
  oagi/utils/__init__.py,sha256=vHXyX66hEsf33OJJkmZSUjaTYU0UngfbtjcZgxfOj3A,441
76
77
  oagi/utils/output_parser.py,sha256=U7vzmoD8pyzDg23z3vy-L9a_jKPsAlr3x8lIdPszrY8,5322
77
78
  oagi/utils/prompt_builder.py,sha256=_Q1HY82YUrq3jSCTZ3Rszu3qmI3Wn_fmq8hf14NuwQM,2180
78
- oagi_core-0.13.2.dist-info/METADATA,sha256=DfZTkJyg0UmS4UH4V67rHhrsdfonwImQnSz7zOdI3QQ,14203
79
- oagi_core-0.13.2.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
80
- oagi_core-0.13.2.dist-info/entry_points.txt,sha256=zzgsOSWX6aN3KUB0Z1it8DMxFFBJBqmZVqMVAJRjYuw,44
81
- oagi_core-0.13.2.dist-info/licenses/LICENSE,sha256=sy5DLA2M29jFT4UfWsuBF9BAr3FnRkYtnAu6oDZiIf8,1075
82
- oagi_core-0.13.2.dist-info/RECORD,,
79
+ oagi_core-0.14.0.dist-info/METADATA,sha256=Y7oBSQFVieDeBMwwVMhsrhMMtSPXjM7PIVm72SfTCtM,16508
80
+ oagi_core-0.14.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
81
+ oagi_core-0.14.0.dist-info/entry_points.txt,sha256=zzgsOSWX6aN3KUB0Z1it8DMxFFBJBqmZVqMVAJRjYuw,44
82
+ oagi_core-0.14.0.dist-info/licenses/LICENSE,sha256=sy5DLA2M29jFT4UfWsuBF9BAr3FnRkYtnAu6oDZiIf8,1075
83
+ oagi_core-0.14.0.dist-info/RECORD,,