oagi-core 0.13.2__tar.gz → 0.14.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {oagi_core-0.13.2 → oagi_core-0.14.0}/PKG-INFO +59 -1
  2. {oagi_core-0.13.2 → oagi_core-0.14.0}/README.md +57 -0
  3. oagi_core-0.14.0/examples/multi_screen_execution.py +74 -0
  4. {oagi_core-0.13.2 → oagi_core-0.14.0}/metapackage/pyproject.toml +2 -2
  5. {oagi_core-0.13.2 → oagi_core-0.14.0}/metapackage/uv.lock +5 -5
  6. {oagi_core-0.13.2 → oagi_core-0.14.0}/pyproject.toml +3 -2
  7. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/__init__.py +4 -0
  8. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/cli/agent.py +44 -0
  9. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/constants.py +1 -1
  10. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/__init__.py +3 -0
  11. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/async_pyautogui_action_handler.py +10 -0
  12. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/async_screenshot_maker.py +5 -0
  13. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/async_ydotool_action_handler.py +10 -0
  14. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/pil_image.py +35 -4
  15. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/pyautogui_action_handler.py +17 -0
  16. oagi_core-0.14.0/src/oagi/handler/screen_manager.py +187 -0
  17. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/screenshot_maker.py +8 -1
  18. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/wayland_support.py +6 -2
  19. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/ydotool_action_handler.py +17 -0
  20. {oagi_core-0.13.2 → oagi_core-0.14.0}/uv.lock +12 -1
  21. {oagi_core-0.13.2 → oagi_core-0.14.0}/.github/ISSUE_TEMPLATE/bug-report.yml +0 -0
  22. {oagi_core-0.13.2 → oagi_core-0.14.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  23. {oagi_core-0.13.2 → oagi_core-0.14.0}/.github/ISSUE_TEMPLATE/feature-request.yml +0 -0
  24. {oagi_core-0.13.2 → oagi_core-0.14.0}/.github/ISSUE_TEMPLATE/question.yml +0 -0
  25. {oagi_core-0.13.2 → oagi_core-0.14.0}/.github/workflows/ci.yml +0 -0
  26. {oagi_core-0.13.2 → oagi_core-0.14.0}/.github/workflows/release.yml +0 -0
  27. {oagi_core-0.13.2 → oagi_core-0.14.0}/.gitignore +0 -0
  28. {oagi_core-0.13.2 → oagi_core-0.14.0}/.python-version +0 -0
  29. {oagi_core-0.13.2 → oagi_core-0.14.0}/CONTRIBUTING.md +0 -0
  30. {oagi_core-0.13.2 → oagi_core-0.14.0}/LICENSE +0 -0
  31. {oagi_core-0.13.2 → oagi_core-0.14.0}/Makefile +0 -0
  32. {oagi_core-0.13.2 → oagi_core-0.14.0}/examples/async_google_weather.py +0 -0
  33. {oagi_core-0.13.2 → oagi_core-0.14.0}/examples/execute_task_auto.py +0 -0
  34. {oagi_core-0.13.2 → oagi_core-0.14.0}/examples/execute_task_manual.py +0 -0
  35. {oagi_core-0.13.2 → oagi_core-0.14.0}/examples/google_weather.py +0 -0
  36. {oagi_core-0.13.2 → oagi_core-0.14.0}/examples/openai_agent_loop_example.py +0 -0
  37. {oagi_core-0.13.2 → oagi_core-0.14.0}/examples/screenshot_with_config.py +0 -0
  38. {oagi_core-0.13.2 → oagi_core-0.14.0}/examples/tasker_agent_example.py +0 -0
  39. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/actor/__init__.py +0 -0
  40. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/actor/async_.py +0 -0
  41. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/actor/async_short.py +0 -0
  42. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/actor/base.py +0 -0
  43. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/actor/short.py +0 -0
  44. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/actor/sync.py +0 -0
  45. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/__init__.py +0 -0
  46. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/default.py +0 -0
  47. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/factories.py +0 -0
  48. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/observer/__init__.py +0 -0
  49. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/observer/agent_observer.py +0 -0
  50. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/observer/events.py +0 -0
  51. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/observer/exporters.py +0 -0
  52. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/observer/protocol.py +0 -0
  53. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/observer/report_template.html +0 -0
  54. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/protocol.py +0 -0
  55. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/registry.py +0 -0
  56. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/tasker/__init__.py +0 -0
  57. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/tasker/memory.py +0 -0
  58. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/tasker/models.py +0 -0
  59. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/tasker/planner.py +0 -0
  60. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/tasker/taskee_agent.py +0 -0
  61. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/agent/tasker/tasker_agent.py +0 -0
  62. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/cli/__init__.py +0 -0
  63. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/cli/display.py +0 -0
  64. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/cli/main.py +0 -0
  65. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/cli/server.py +0 -0
  66. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/cli/tracking.py +0 -0
  67. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/cli/utils.py +0 -0
  68. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/client/__init__.py +0 -0
  69. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/client/async_.py +0 -0
  70. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/client/base.py +0 -0
  71. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/client/sync.py +0 -0
  72. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/exceptions.py +0 -0
  73. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/_macos.py +0 -0
  74. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/_windows.py +0 -0
  75. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/_ydotool.py +0 -0
  76. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/capslock_manager.py +0 -0
  77. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/handler/utils.py +0 -0
  78. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/logging.py +0 -0
  79. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/platform_info.py +0 -0
  80. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/server/__init__.py +0 -0
  81. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/server/agent_wrappers.py +0 -0
  82. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/server/config.py +0 -0
  83. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/server/main.py +0 -0
  84. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/server/models.py +0 -0
  85. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/server/session_store.py +0 -0
  86. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/server/socketio_server.py +0 -0
  87. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/task/__init__.py +0 -0
  88. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/__init__.py +0 -0
  89. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/action_handler.py +0 -0
  90. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/async_action_handler.py +0 -0
  91. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/async_image_provider.py +0 -0
  92. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/image.py +0 -0
  93. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/image_provider.py +0 -0
  94. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/models/__init__.py +0 -0
  95. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/models/action.py +0 -0
  96. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/models/client.py +0 -0
  97. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/models/image_config.py +0 -0
  98. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/models/step.py +0 -0
  99. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/step_observer.py +0 -0
  100. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/types/url.py +0 -0
  101. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/utils/__init__.py +0 -0
  102. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/utils/output_parser.py +0 -0
  103. {oagi_core-0.13.2 → oagi_core-0.14.0}/src/oagi/utils/prompt_builder.py +0 -0
  104. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/__init__.py +0 -0
  105. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/conftest.py +0 -0
  106. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_action_parsing.py +0 -0
  107. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_actor.py +0 -0
  108. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_agent/test_agent_wrappers.py +0 -0
  109. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_agent/test_default_agent.py +0 -0
  110. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_agent_registry.py +0 -0
  111. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_async_actor.py +0 -0
  112. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_async_client.py +0 -0
  113. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_async_handlers.py +0 -0
  114. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_cli.py +0 -0
  115. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_logging.py +0 -0
  116. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_mac_double_click.py +0 -0
  117. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_observer.py +0 -0
  118. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_pil_image.py +0 -0
  119. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_planner.py +0 -0
  120. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_planner_memory.py +0 -0
  121. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_pyautogui_action_handler.py +0 -0
  122. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_screenshot_maker.py +0 -0
  123. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_server/__init__.py +0 -0
  124. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_server/test_config.py +0 -0
  125. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_server/test_session_store.py +0 -0
  126. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_server/test_socketio_integration.py +0 -0
  127. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_sync_client.py +0 -0
  128. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_taskee_agent.py +0 -0
  129. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/test_tasker_agent.py +0 -0
  130. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/utils/__init__.py +0 -0
  131. {oagi_core-0.13.2 → oagi_core-0.14.0}/tests/utils/test_output_parser.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: oagi-core
3
- Version: 0.13.2
3
+ Version: 0.14.0
4
4
  Summary: Official API of OpenAGI Foundation
5
5
  Project-URL: Homepage, https://github.com/agiopen-org/oagi
6
6
  Author-email: OpenAGI Foundation <contact@agiopen.org>
@@ -31,6 +31,7 @@ Requires-Dist: openai>=1.3.0
31
31
  Requires-Dist: pydantic>=2.0.0
32
32
  Requires-Dist: rich>=10.0.0
33
33
  Provides-Extra: desktop
34
+ Requires-Dist: mss>=9.0.0; (sys_platform == 'win32') and extra == 'desktop'
34
35
  Requires-Dist: pillow>=9.0.0; extra == 'desktop'
35
36
  Requires-Dist: pyautogui>=0.9.54; extra == 'desktop'
36
37
  Requires-Dist: pyobjc-framework-applicationservices>=8.0; (sys_platform == 'darwin') and extra == 'desktop'
@@ -184,8 +185,14 @@ oagi agent modes
184
185
  # Check macOS permissions (screen recording & accessibility)
185
186
  oagi agent permission
186
187
 
188
+ # Print all available screens and their indices
189
+ oagi agent screens
190
+
187
191
  # Export execution history
188
192
  oagi agent run "Complete the form" --export html --export-file report.html
193
+
194
+ # Run with a specific screen
195
+ oagi agent run "Search weather on Google" --screen-index 1
189
196
  ```
190
197
 
191
198
  CLI options:
@@ -196,6 +203,7 @@ CLI options:
196
203
  - `--step-delay`: Delay after each action before next screenshot (default: 0.3s)
197
204
  - `--export`: Export format (markdown, html, json)
198
205
  - `--export-file`: Output file path for export
206
+ - `--screen-index`: Screen index for multi-screen environments
199
207
 
200
208
  ### Image Processing
201
209
 
@@ -280,11 +288,61 @@ config = YdotoolConfig(
280
288
  action_handler = AsyncYdotoolActionHandler(config=config)
281
289
  ```
282
290
 
291
+ ### Multi-Screen Execution
292
+ When running on multi-screen environments, you can choose which screen to use for task execution. The `ScreenManager` class provides methods to list available screens, while the `AsyncPyautoguiActionHandler` and `AsyncScreenshotMaker` classes allow you to set the target screen for actions and screenshots. In the result of `get_all_screens`, the primary screen is always the first one in the list and the remaining screens are appended in the ascending order of their origin coordinates.
293
+
294
+ ```python
295
+ import asyncio
296
+ import sys
297
+ from oagi import ScreenManager
298
+
299
+ # Must be initialized before importing pyautogui to ensure correct DPI awareness in Windows
300
+ if sys.platform == "win32":
301
+ ScreenManager.enable_windows_dpi_awareness()
302
+
303
+ from oagi import (
304
+ AsyncDefaultAgent,
305
+ AsyncPyautoguiActionHandler,
306
+ AsyncScreenshotMaker,
307
+ )
308
+
309
+ def print_all_screens():
310
+ """Print all available screens."""
311
+ screen_manager = ScreenManager()
312
+ all_screens = screen_manager.get_all_screens()
313
+ print("Available screens:")
314
+ for screen_index, screen in enumerate(all_screens):
315
+ print(f" - Index {screen_index}: {screen}")
316
+
317
+ async def main():
318
+ agent = AsyncDefaultAgent(max_steps=10)
319
+ action_handler = AsyncPyautoguiActionHandler()
320
+ image_provider = AsyncScreenshotMaker()
321
+ # Get all available screens
322
+ screen_manager = ScreenManager()
323
+ all_screens = screen_manager.get_all_screens()
324
+ # Choose a screen for task execution
325
+ screen_index = 1 # Use the second screen as example
326
+ target_screen = all_screens[screen_index]
327
+ # Set the target screen for handlers
328
+ action_handler.set_target_screen(target_screen)
329
+ image_provider.set_target_screen(target_screen)
330
+ completed = await agent.execute(
331
+ "Search weather on Google",
332
+ action_handler=action_handler,
333
+ image_provider=image_provider,
334
+ )
335
+ return completed
336
+
337
+ asyncio.run(main())
338
+ ```
339
+
283
340
  ## Examples
284
341
 
285
342
  See the [`examples/`](examples/) directory for more usage patterns:
286
343
  - `execute_task_auto.py` - Automated task execution with `AsyncDefaultAgent`
287
344
  - `execute_task_manual.py` - Manual step-by-step control with `Actor`
345
+ - `multi_screen_execution.py` - Automated task execution on multi-screen environments
288
346
  - `continued_session.py` - Continuing tasks across sessions
289
347
  - `screenshot_with_config.py` - Image compression and optimization
290
348
  - `socketio_server_basic.py` - Socket.IO server example
@@ -139,8 +139,14 @@ oagi agent modes
139
139
  # Check macOS permissions (screen recording & accessibility)
140
140
  oagi agent permission
141
141
 
142
+ # Print all available screens and their indices
143
+ oagi agent screens
144
+
142
145
  # Export execution history
143
146
  oagi agent run "Complete the form" --export html --export-file report.html
147
+
148
+ # Run with a specific screen
149
+ oagi agent run "Search weather on Google" --screen-index 1
144
150
  ```
145
151
 
146
152
  CLI options:
@@ -151,6 +157,7 @@ CLI options:
151
157
  - `--step-delay`: Delay after each action before next screenshot (default: 0.3s)
152
158
  - `--export`: Export format (markdown, html, json)
153
159
  - `--export-file`: Output file path for export
160
+ - `--screen-index`: Screen index for multi-screen environments
154
161
 
155
162
  ### Image Processing
156
163
 
@@ -235,11 +242,61 @@ config = YdotoolConfig(
235
242
  action_handler = AsyncYdotoolActionHandler(config=config)
236
243
  ```
237
244
 
245
+ ### Multi-Screen Execution
246
+ When running on multi-screen environments, you can choose which screen to use for task execution. The `ScreenManager` class provides methods to list available screens, while the `AsyncPyautoguiActionHandler` and `AsyncScreenshotMaker` classes allow you to set the target screen for actions and screenshots. In the result of `get_all_screens`, the primary screen is always the first one in the list and the remaining screens are appended in the ascending order of their origin coordinates.
247
+
248
+ ```python
249
+ import asyncio
250
+ import sys
251
+ from oagi import ScreenManager
252
+
253
+ # Must be initialized before importing pyautogui to ensure correct DPI awareness in Windows
254
+ if sys.platform == "win32":
255
+ ScreenManager.enable_windows_dpi_awareness()
256
+
257
+ from oagi import (
258
+ AsyncDefaultAgent,
259
+ AsyncPyautoguiActionHandler,
260
+ AsyncScreenshotMaker,
261
+ )
262
+
263
+ def print_all_screens():
264
+ """Print all available screens."""
265
+ screen_manager = ScreenManager()
266
+ all_screens = screen_manager.get_all_screens()
267
+ print("Available screens:")
268
+ for screen_index, screen in enumerate(all_screens):
269
+ print(f" - Index {screen_index}: {screen}")
270
+
271
+ async def main():
272
+ agent = AsyncDefaultAgent(max_steps=10)
273
+ action_handler = AsyncPyautoguiActionHandler()
274
+ image_provider = AsyncScreenshotMaker()
275
+ # Get all available screens
276
+ screen_manager = ScreenManager()
277
+ all_screens = screen_manager.get_all_screens()
278
+ # Choose a screen for task execution
279
+ screen_index = 1 # Use the second screen as example
280
+ target_screen = all_screens[screen_index]
281
+ # Set the target screen for handlers
282
+ action_handler.set_target_screen(target_screen)
283
+ image_provider.set_target_screen(target_screen)
284
+ completed = await agent.execute(
285
+ "Search weather on Google",
286
+ action_handler=action_handler,
287
+ image_provider=image_provider,
288
+ )
289
+ return completed
290
+
291
+ asyncio.run(main())
292
+ ```
293
+
238
294
  ## Examples
239
295
 
240
296
  See the [`examples/`](examples/) directory for more usage patterns:
241
297
  - `execute_task_auto.py` - Automated task execution with `AsyncDefaultAgent`
242
298
  - `execute_task_manual.py` - Manual step-by-step control with `Actor`
299
+ - `multi_screen_execution.py` - Automated task execution on multi-screen environments
243
300
  - `continued_session.py` - Continuing tasks across sessions
244
301
  - `screenshot_with_config.py` - Image compression and optimization
245
302
  - `socketio_server_basic.py` - Socket.IO server example
@@ -0,0 +1,74 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+ import asyncio
9
+ import sys
10
+
11
+ from oagi import ScreenManager
12
+
13
+ # Must be initialized before importing pyautogui to ensure correct DPI awareness in Windows
14
+ if sys.platform == "win32":
15
+ ScreenManager.enable_windows_dpi_awareness()
16
+
17
+ from oagi import (
18
+ AsyncDefaultAgent,
19
+ AsyncPyautoguiActionHandler,
20
+ AsyncScreenshotMaker,
21
+ )
22
+
23
+
24
+ def print_all_screens():
25
+ """Print all available screens."""
26
+ screen_manager = ScreenManager()
27
+ all_screens = screen_manager.get_all_screens()
28
+ print("Available screens:")
29
+ for screen_index, screen in enumerate(all_screens):
30
+ print(f" - Index {screen_index}: {screen}")
31
+
32
+
33
+ def execute_task_on_specific_screen(task_desc, max_steps=5, screen_index=0):
34
+ """Synchronous wrapper for async task execution."""
35
+ # Print all screens and choose one screen for task execution
36
+ return asyncio.run(
37
+ async_execute_task_on_specific_screen(task_desc, max_steps, screen_index)
38
+ )
39
+
40
+
41
+ async def async_execute_task_on_specific_screen(task_desc, max_steps=5, screen_index=0):
42
+ # set OAGI_API_KEY and OAGI_BASE_URL
43
+ # or AsyncDefaultAgent(api_key="your_api_key", base_url="your_base_url")
44
+ agent = AsyncDefaultAgent(max_steps=max_steps)
45
+
46
+ # executor = lambda actions: print(actions) for debugging
47
+ action_handler = AsyncPyautoguiActionHandler()
48
+ image_provider = AsyncScreenshotMaker()
49
+
50
+ # Get the target screen info for task execution
51
+ screen_manager = ScreenManager()
52
+ all_screens = screen_manager.get_all_screens()
53
+ screen = all_screens[screen_index]
54
+ # Set the screen index for handlers
55
+ action_handler.set_target_screen(screen)
56
+ image_provider.set_target_screen(screen)
57
+
58
+ is_completed = await agent.execute(
59
+ task_desc,
60
+ action_handler=action_handler,
61
+ image_provider=image_provider,
62
+ )
63
+
64
+ return is_completed, await image_provider.last_image()
65
+
66
+
67
+ if __name__ == "__main__":
68
+ # Example task
69
+ task_desc = "Open Chrome and navigate to google.com"
70
+ screen_index = 1 # Use the second screen as example
71
+ success, image = execute_task_on_specific_screen(
72
+ task_desc, screen_index=screen_index
73
+ )
74
+ print(f"\nFinal result: {'Success' if success else 'Failed'}")
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "oagi"
7
- version = "0.13.2"
7
+ version = "0.14.0"
8
8
  description = "Official API of OpenAGI Foundation (metapackage with all features)"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -16,7 +16,7 @@ authors = [
16
16
  requires-python = ">= 3.10"
17
17
 
18
18
  dependencies = [
19
- "oagi-core[desktop,server]==0.13.2",
19
+ "oagi-core[desktop,server]==0.14.0",
20
20
  ]
21
21
 
22
22
  [project.urls]
@@ -518,18 +518,18 @@ sdist = { url = "https://files.pythonhosted.org/packages/28/fa/b2ba8229b9381e8f6
518
518
 
519
519
  [[package]]
520
520
  name = "oagi"
521
- version = "0.13.2"
521
+ version = "0.14.0"
522
522
  source = { editable = "." }
523
523
  dependencies = [
524
524
  { name = "oagi-core", extra = ["desktop", "server"] },
525
525
  ]
526
526
 
527
527
  [package.metadata]
528
- requires-dist = [{ name = "oagi-core", extras = ["desktop", "server"], specifier = "==0.13.1" }]
528
+ requires-dist = [{ name = "oagi-core", extras = ["desktop", "server"], specifier = "==0.13.2" }]
529
529
 
530
530
  [[package]]
531
531
  name = "oagi-core"
532
- version = "0.13.1"
532
+ version = "0.13.2"
533
533
  source = { registry = "https://pypi.org/simple" }
534
534
  dependencies = [
535
535
  { name = "httpx" },
@@ -537,9 +537,9 @@ dependencies = [
537
537
  { name = "pydantic" },
538
538
  { name = "rich" },
539
539
  ]
540
- sdist = { url = "https://files.pythonhosted.org/packages/62/b0/6006c57afc693a4afdc5389f1c93e83654745ec2b25d33c091621aec7d15/oagi_core-0.13.1.tar.gz", hash = "sha256:8dcbf45c6eb7203573d04122ebb53c67d7c955bb6bbfe8332fe2e641e7cb689f", size = 304385, upload-time = "2026-01-12T04:14:18.758Z" }
540
+ sdist = { url = "https://files.pythonhosted.org/packages/32/64/cf3f690deafbbacd17333e307f4fa6fc9daa68c69de0e426db3684c9e25b/oagi_core-0.13.2.tar.gz", hash = "sha256:f5b4957bd383ed312753b36476680e99adc42a0ff9d912054cf54021d8b132fe", size = 304580, upload-time = "2026-01-16T02:17:23.505Z" }
541
541
  wheels = [
542
- { url = "https://files.pythonhosted.org/packages/6e/78/0a04d5a28313ab6dc783a5c4d8b32c1871fa2a140f75dfe42f56024c8865/oagi_core-0.13.1-py3-none-any.whl", hash = "sha256:b29637ca080fa7092224231b113b35c07f09657bed203e8e26e13e2c2a65665f", size = 110049, upload-time = "2026-01-12T04:14:17.645Z" },
542
+ { url = "https://files.pythonhosted.org/packages/5b/12/8d2d0240176cfb275282c735fa57ea7620ba3f8a40ba73a2b151c1c6c3b7/oagi_core-0.13.2-py3-none-any.whl", hash = "sha256:4251a9d4f056b98d653a8e2fce3de8d19f5836130275161ec8b4733646a8933d", size = 110155, upload-time = "2026-01-16T02:17:22.055Z" },
543
543
  ]
544
544
 
545
545
  [package.optional-dependencies]
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "oagi-core"
7
- version = "0.13.2"
7
+ version = "0.14.0"
8
8
  description = "Official API of OpenAGI Foundation"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -33,7 +33,8 @@ desktop = [
33
33
  "pyautogui>=0.9.54",
34
34
  "pyobjc-framework-Quartz>=8.0; sys_platform == 'darwin'",
35
35
  "pyobjc-framework-ApplicationServices>=8.0; sys_platform == 'darwin'",
36
- "screeninfo>=0.8.1"
36
+ "screeninfo>=0.8.1",
37
+ "mss>=9.0.0; sys_platform == 'win32'",
37
38
  ]
38
39
  server = [
39
40
  "fastapi[standard]>=0.100.0",
@@ -81,6 +81,7 @@ _LAZY_IMPORTS_DATA: dict[str, tuple[str, str | None, str | None]] = {
81
81
  "screeninfo",
82
82
  "desktop",
83
83
  ),
84
+ "ScreenManager": ("oagi.handler.screen_manager", None, None),
84
85
  }
85
86
 
86
87
  if TYPE_CHECKING:
@@ -95,6 +96,7 @@ if TYPE_CHECKING:
95
96
  PyautoguiActionHandler,
96
97
  PyautoguiConfig,
97
98
  )
99
+ from oagi.handler.screen_manager import ScreenManager
98
100
  from oagi.handler.screenshot_maker import ScreenshotMaker
99
101
  from oagi.handler.ydotool_action_handler import YdotoolActionHandler, YdotoolConfig
100
102
  from oagi.server.config import ServerConfig
@@ -170,4 +172,6 @@ __all__ = [
170
172
  "AsyncYdotoolActionHandler",
171
173
  "YdotoolActionHandler",
172
174
  "YdotoolConfig",
175
+ # Lazy imports - Screen manager
176
+ "ScreenManager",
173
177
  ]
@@ -86,6 +86,11 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
86
86
  type=float,
87
87
  help=f"Delay in seconds after each step before next screenshot (default: {DEFAULT_STEP_DELAY})",
88
88
  )
89
+ run_parser.add_argument(
90
+ "--screen-index",
91
+ type=int,
92
+ help="Choose the index of screen to run the task",
93
+ )
89
94
 
90
95
  # agent modes command
91
96
  agent_subparsers.add_parser("modes", help="List available agent modes")
@@ -96,6 +101,11 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
96
101
  help="Check macOS permissions for screen recording and accessibility",
97
102
  )
98
103
 
104
+ # agent screens command
105
+ agent_subparsers.add_parser(
106
+ "screens", help="List all available screens for agent execution"
107
+ )
108
+
99
109
 
100
110
  def handle_agent_command(args: argparse.Namespace) -> None:
101
111
  if args.agent_command == "run":
@@ -104,6 +114,19 @@ def handle_agent_command(args: argparse.Namespace) -> None:
104
114
  list_modes()
105
115
  elif args.agent_command == "permission":
106
116
  check_permissions()
117
+ elif args.agent_command == "screens":
118
+ list_screens()
119
+
120
+
121
+ def list_screens() -> None:
122
+ """List all available screens for agent execution."""
123
+ from oagi import ScreenManager # noqa: PLC0415
124
+
125
+ screen_manager = ScreenManager()
126
+ screens = screen_manager.get_all_screens()
127
+ print("Available screens:")
128
+ for screen_index, screen in enumerate(screens):
129
+ print(f" - Index {screen_index}: {screen}")
107
130
 
108
131
 
109
132
  def list_modes() -> None:
@@ -212,6 +235,22 @@ def run_agent(args: argparse.Namespace) -> None:
212
235
  from oagi.agent import create_agent # noqa: PLC0415
213
236
  from oagi.handler.wayland_support import is_wayland_display_server # noqa: PLC0415
214
237
 
238
+ # Create screen manager for multi-screen support
239
+ # Must be initialized before importing pyautogui to ensure correct DPI awareness in Windows
240
+ target_screen = None
241
+ if args.screen_index is not None:
242
+ from oagi.handler import ScreenManager # noqa: PLC0415
243
+
244
+ screen_index = args.screen_index
245
+ screen_manager = ScreenManager()
246
+ all_screens = screen_manager.get_all_screens()
247
+ if screen_index >= len(all_screens) or screen_index < 0:
248
+ raise ValueError(
249
+ f"Error: Screen index {screen_index} not found. Available screen indices: {list(range(len(all_screens)))}"
250
+ )
251
+ target_screen = all_screens[screen_index]
252
+ print(f"Target screen: {target_screen}")
253
+
215
254
  # Select appropriate action handler based on display server
216
255
  if is_wayland_display_server():
217
256
  check_optional_dependency("screeninfo", "Agent execution (Wayland)", "desktop")
@@ -280,6 +319,11 @@ def run_agent(args: argparse.Namespace) -> None:
280
319
  # Create image provider
281
320
  image_provider = AsyncScreenshotMaker()
282
321
 
322
+ if target_screen:
323
+ # Set the target screen for the image and action provider
324
+ image_provider.set_target_screen(target_screen)
325
+ action_handler.set_target_screen(target_screen)
326
+
283
327
  if args.instruction:
284
328
  print(f"Starting agent with instruction: {args.instruction}")
285
329
  else:
@@ -35,7 +35,7 @@ DEFAULT_REFLECTION_INTERVAL = 4
35
35
  DEFAULT_REFLECTION_INTERVAL_TASKER = 20
36
36
 
37
37
  # Timing & Delays
38
- DEFAULT_STEP_DELAY = 0.3
38
+ DEFAULT_STEP_DELAY = 1.0
39
39
 
40
40
  # Temperature Defaults
41
41
  DEFAULT_TEMPERATURE = 0.5
@@ -21,6 +21,7 @@ _LAZY_IMPORTS: dict[str, str] = {
21
21
  "AsyncYdotoolActionHandler": "oagi.handler.async_ydotool_action_handler",
22
22
  "YdotoolActionHandler": "oagi.handler.ydotool_action_handler",
23
23
  "YdotoolConfig": "oagi.handler.ydotool_action_handler",
24
+ "ScreenManager": "oagi.handler.screen_manager",
24
25
  }
25
26
 
26
27
  if TYPE_CHECKING:
@@ -32,6 +33,7 @@ if TYPE_CHECKING:
32
33
  PyautoguiActionHandler,
33
34
  PyautoguiConfig,
34
35
  )
36
+ from oagi.handler.screen_manager import ScreenManager
35
37
  from oagi.handler.screenshot_maker import ScreenshotMaker
36
38
  from oagi.handler.ydotool_action_handler import YdotoolActionHandler, YdotoolConfig
37
39
 
@@ -60,4 +62,5 @@ __all__ = [
60
62
  "YdotoolConfig",
61
63
  "YdotoolActionHandler",
62
64
  "AsyncYdotoolActionHandler",
65
+ "ScreenManager",
63
66
  ]
@@ -8,6 +8,8 @@
8
8
 
9
9
  import asyncio
10
10
 
11
+ from oagi.handler.screen_manager import Screen
12
+
11
13
  from ..types import Action
12
14
  from .pyautogui_action_handler import PyautoguiActionHandler, PyautoguiConfig
13
15
 
@@ -29,6 +31,14 @@ class AsyncPyautoguiActionHandler:
29
31
  self.sync_handler = PyautoguiActionHandler(config=config)
30
32
  self.config = config or PyautoguiConfig()
31
33
 
34
+ def set_target_screen(self, screen: Screen) -> None:
35
+ """Set the target screen for the action handler.
36
+
37
+ Args:
38
+ screen (Screen): The screen object to set as the target.
39
+ """
40
+ self.sync_handler.set_target_screen(screen)
41
+
32
42
  def reset(self):
33
43
  """Reset handler state.
34
44
 
@@ -8,6 +8,8 @@
8
8
 
9
9
  import asyncio
10
10
 
11
+ from oagi.handler.screen_manager import Screen
12
+
11
13
  from ..types import Image, ImageConfig
12
14
  from .screenshot_maker import ScreenshotMaker
13
15
 
@@ -29,6 +31,9 @@ class AsyncScreenshotMaker:
29
31
  self.sync_screenshot_maker = ScreenshotMaker(config=config)
30
32
  self.config = config
31
33
 
34
+ def set_target_screen(self, screen: Screen) -> None:
35
+ self.sync_screenshot_maker.set_target_screen(screen)
36
+
32
37
  async def __call__(self) -> Image:
33
38
  """
34
39
  Capture a screenshot asynchronously using a thread pool executor.
@@ -8,6 +8,8 @@
8
8
 
9
9
  import asyncio
10
10
 
11
+ from oagi.handler.screen_manager import Screen
12
+
11
13
  from ..types import Action
12
14
  from .ydotool_action_handler import YdotoolActionHandler, YdotoolConfig
13
15
 
@@ -29,6 +31,14 @@ class AsyncYdotoolActionHandler:
29
31
  self.config = config or YdotoolConfig()
30
32
  self.sync_handler = YdotoolActionHandler(config=self.config)
31
33
 
34
+ def set_target_screen(self, screen: Screen) -> None:
35
+ """Set the target screen for the action handler.
36
+
37
+ Args:
38
+ screen (Screen): The screen object to set as the target.
39
+ """
40
+ self.sync_handler.set_target_screen(screen)
41
+
32
42
  def reset(self):
33
43
  """Reset handler state.
34
44
 
@@ -7,6 +7,7 @@
7
7
  # -----------------------------------------------------------------------------
8
8
 
9
9
  import io
10
+ import sys
10
11
 
11
12
  from ..exceptions import check_optional_dependency
12
13
  from ..types.models.image_config import ImageConfig
@@ -39,17 +40,47 @@ class PILImage:
39
40
  return cls(image, config)
40
41
 
41
42
  @classmethod
42
- def from_screenshot(cls, config: ImageConfig | None = None) -> "PILImage":
43
- """Create PILImage from screenshot."""
43
+ def from_screenshot(
44
+ cls,
45
+ config: ImageConfig | None = None,
46
+ region: tuple[int, int, int, int] | None = None,
47
+ ) -> "PILImage":
48
+ """Create PILImage from screenshot.
49
+
50
+ Args:
51
+ config: ImageConfig for transformations
52
+ region: Optional (x, y, width, height) tuple for cropping
53
+ """
44
54
  # Use flameshot by default in Wayland display environment
45
55
  if is_wayland_display_server():
46
- return cls(wayland_screenshot(), config)
56
+ return cls(wayland_screenshot(region=region), config)
47
57
 
48
58
  # Lazy import to avoid DISPLAY issues in headless environments
49
59
  check_optional_dependency("pyautogui", "PILImage.from_screenshot()", "desktop")
50
60
  import pyautogui # noqa: PLC0415
51
61
 
52
- screenshot = pyautogui.screenshot()
62
+ if sys.platform == "win32" and region is not None:
63
+ # Use mss instead of pyautogui for screenshots in multi-monitor Windows setups
64
+ import mss # noqa: PLC0415
65
+
66
+ with mss.mss() as sct:
67
+ screenshot_data = sct.grab(
68
+ {
69
+ "top": region[1],
70
+ "left": region[0],
71
+ "width": region[2],
72
+ "height": region[3],
73
+ }
74
+ )
75
+ screenshot = PILImageLib.frombytes(
76
+ "RGB",
77
+ screenshot_data.size,
78
+ screenshot_data.bgra,
79
+ "raw",
80
+ "BGRX",
81
+ )
82
+ else:
83
+ screenshot = pyautogui.screenshot(region=region)
53
84
  return cls(screenshot, config)
54
85
 
55
86
  def transform(self, config: ImageConfig) -> "PILImage":
@@ -11,6 +11,8 @@ import time
11
11
 
12
12
  from pydantic import BaseModel, Field
13
13
 
14
+ from oagi.handler.screen_manager import Screen
15
+
14
16
  from ..exceptions import check_optional_dependency
15
17
  from ..types import Action, ActionType, parse_coords, parse_drag_coords, parse_scroll
16
18
  from .capslock_manager import CapsLockManager
@@ -81,6 +83,8 @@ class PyautoguiActionHandler:
81
83
  pyautogui.PAUSE = self.config.action_pause
82
84
  # Initialize caps lock manager
83
85
  self.caps_manager = CapsLockManager(mode=self.config.capslock_mode)
86
+ # The origin position of coordinates (the top-left corner of the target screen)
87
+ self.origin_x, self.origin_y = 0, 0
84
88
 
85
89
  def reset(self):
86
90
  """Reset handler state.
@@ -90,6 +94,15 @@ class PyautoguiActionHandler:
90
94
  """
91
95
  self.caps_manager.reset()
92
96
 
97
+ def set_target_screen(self, screen: Screen) -> None:
98
+ """Set the target screen for the action handler.
99
+
100
+ Args:
101
+ screen (Screen): The screen object to set as the target.
102
+ """
103
+ self.screen_width, self.screen_height = screen.width, screen.height
104
+ self.origin_x, self.origin_y = screen.x, screen.y
105
+
93
106
  def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
94
107
  """Convert coordinates from 0-1000 range to actual screen coordinates.
95
108
 
@@ -111,6 +124,10 @@ class PyautoguiActionHandler:
111
124
  elif screen_y > self.screen_height - 1:
112
125
  screen_y = self.screen_height - 1
113
126
 
127
+ # Add origin offset to convert relative to top-left corner
128
+ screen_x += self.origin_x
129
+ screen_y += self.origin_y
130
+
114
131
  return screen_x, screen_y
115
132
 
116
133
  def _parse_coords(self, args_str: str) -> tuple[int, int]: