oagi-core 0.13.2__tar.gz → 0.14.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {oagi_core-0.13.2 → oagi_core-0.14.1}/PKG-INFO +72 -10
  2. {oagi_core-0.13.2 → oagi_core-0.14.1}/README.md +70 -9
  3. oagi_core-0.14.1/examples/multi_screen_execution.py +74 -0
  4. {oagi_core-0.13.2 → oagi_core-0.14.1}/metapackage/pyproject.toml +2 -2
  5. {oagi_core-0.13.2 → oagi_core-0.14.1}/metapackage/uv.lock +15 -5
  6. {oagi_core-0.13.2 → oagi_core-0.14.1}/pyproject.toml +3 -2
  7. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/__init__.py +4 -0
  8. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/default.py +4 -6
  9. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/tasker/taskee_agent.py +4 -6
  10. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/cli/agent.py +44 -0
  11. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/constants.py +1 -1
  12. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/__init__.py +3 -0
  13. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/async_pyautogui_action_handler.py +12 -1
  14. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/async_screenshot_maker.py +5 -0
  15. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/async_ydotool_action_handler.py +10 -0
  16. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/pil_image.py +35 -4
  17. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/pyautogui_action_handler.py +28 -0
  18. oagi_core-0.14.1/src/oagi/handler/screen_manager.py +187 -0
  19. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/screenshot_maker.py +8 -1
  20. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/utils.py +14 -0
  21. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/wayland_support.py +6 -2
  22. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/ydotool_action_handler.py +28 -0
  23. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_mac_double_click.py +6 -3
  24. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_pyautogui_action_handler.py +47 -19
  25. {oagi_core-0.13.2 → oagi_core-0.14.1}/uv.lock +12 -1
  26. {oagi_core-0.13.2 → oagi_core-0.14.1}/.github/ISSUE_TEMPLATE/bug-report.yml +0 -0
  27. {oagi_core-0.13.2 → oagi_core-0.14.1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  28. {oagi_core-0.13.2 → oagi_core-0.14.1}/.github/ISSUE_TEMPLATE/feature-request.yml +0 -0
  29. {oagi_core-0.13.2 → oagi_core-0.14.1}/.github/ISSUE_TEMPLATE/question.yml +0 -0
  30. {oagi_core-0.13.2 → oagi_core-0.14.1}/.github/workflows/ci.yml +0 -0
  31. {oagi_core-0.13.2 → oagi_core-0.14.1}/.github/workflows/release.yml +0 -0
  32. {oagi_core-0.13.2 → oagi_core-0.14.1}/.gitignore +0 -0
  33. {oagi_core-0.13.2 → oagi_core-0.14.1}/.python-version +0 -0
  34. {oagi_core-0.13.2 → oagi_core-0.14.1}/CONTRIBUTING.md +0 -0
  35. {oagi_core-0.13.2 → oagi_core-0.14.1}/LICENSE +0 -0
  36. {oagi_core-0.13.2 → oagi_core-0.14.1}/Makefile +0 -0
  37. {oagi_core-0.13.2 → oagi_core-0.14.1}/examples/async_google_weather.py +0 -0
  38. {oagi_core-0.13.2 → oagi_core-0.14.1}/examples/execute_task_auto.py +0 -0
  39. {oagi_core-0.13.2 → oagi_core-0.14.1}/examples/execute_task_manual.py +0 -0
  40. {oagi_core-0.13.2 → oagi_core-0.14.1}/examples/google_weather.py +0 -0
  41. {oagi_core-0.13.2 → oagi_core-0.14.1}/examples/openai_agent_loop_example.py +0 -0
  42. {oagi_core-0.13.2 → oagi_core-0.14.1}/examples/screenshot_with_config.py +0 -0
  43. {oagi_core-0.13.2 → oagi_core-0.14.1}/examples/tasker_agent_example.py +0 -0
  44. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/actor/__init__.py +0 -0
  45. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/actor/async_.py +0 -0
  46. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/actor/async_short.py +0 -0
  47. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/actor/base.py +0 -0
  48. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/actor/short.py +0 -0
  49. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/actor/sync.py +0 -0
  50. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/__init__.py +0 -0
  51. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/factories.py +0 -0
  52. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/observer/__init__.py +0 -0
  53. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/observer/agent_observer.py +0 -0
  54. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/observer/events.py +0 -0
  55. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/observer/exporters.py +0 -0
  56. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/observer/protocol.py +0 -0
  57. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/observer/report_template.html +0 -0
  58. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/protocol.py +0 -0
  59. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/registry.py +0 -0
  60. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/tasker/__init__.py +0 -0
  61. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/tasker/memory.py +0 -0
  62. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/tasker/models.py +0 -0
  63. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/tasker/planner.py +0 -0
  64. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/agent/tasker/tasker_agent.py +0 -0
  65. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/cli/__init__.py +0 -0
  66. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/cli/display.py +0 -0
  67. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/cli/main.py +0 -0
  68. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/cli/server.py +0 -0
  69. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/cli/tracking.py +0 -0
  70. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/cli/utils.py +0 -0
  71. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/client/__init__.py +0 -0
  72. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/client/async_.py +0 -0
  73. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/client/base.py +0 -0
  74. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/client/sync.py +0 -0
  75. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/exceptions.py +0 -0
  76. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/_macos.py +0 -0
  77. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/_windows.py +0 -0
  78. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/_ydotool.py +0 -0
  79. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/handler/capslock_manager.py +0 -0
  80. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/logging.py +0 -0
  81. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/platform_info.py +0 -0
  82. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/server/__init__.py +0 -0
  83. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/server/agent_wrappers.py +0 -0
  84. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/server/config.py +0 -0
  85. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/server/main.py +0 -0
  86. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/server/models.py +0 -0
  87. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/server/session_store.py +0 -0
  88. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/server/socketio_server.py +0 -0
  89. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/task/__init__.py +0 -0
  90. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/__init__.py +0 -0
  91. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/action_handler.py +0 -0
  92. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/async_action_handler.py +0 -0
  93. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/async_image_provider.py +0 -0
  94. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/image.py +0 -0
  95. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/image_provider.py +0 -0
  96. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/models/__init__.py +0 -0
  97. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/models/action.py +0 -0
  98. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/models/client.py +0 -0
  99. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/models/image_config.py +0 -0
  100. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/models/step.py +0 -0
  101. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/step_observer.py +0 -0
  102. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/types/url.py +0 -0
  103. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/utils/__init__.py +0 -0
  104. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/utils/output_parser.py +0 -0
  105. {oagi_core-0.13.2 → oagi_core-0.14.1}/src/oagi/utils/prompt_builder.py +0 -0
  106. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/__init__.py +0 -0
  107. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/conftest.py +0 -0
  108. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_action_parsing.py +0 -0
  109. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_actor.py +0 -0
  110. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_agent/test_agent_wrappers.py +0 -0
  111. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_agent/test_default_agent.py +0 -0
  112. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_agent_registry.py +0 -0
  113. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_async_actor.py +0 -0
  114. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_async_client.py +0 -0
  115. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_async_handlers.py +0 -0
  116. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_cli.py +0 -0
  117. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_logging.py +0 -0
  118. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_observer.py +0 -0
  119. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_pil_image.py +0 -0
  120. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_planner.py +0 -0
  121. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_planner_memory.py +0 -0
  122. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_screenshot_maker.py +0 -0
  123. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_server/__init__.py +0 -0
  124. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_server/test_config.py +0 -0
  125. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_server/test_session_store.py +0 -0
  126. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_server/test_socketio_integration.py +0 -0
  127. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_sync_client.py +0 -0
  128. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_taskee_agent.py +0 -0
  129. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/test_tasker_agent.py +0 -0
  130. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/utils/__init__.py +0 -0
  131. {oagi_core-0.13.2 → oagi_core-0.14.1}/tests/utils/test_output_parser.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: oagi-core
3
- Version: 0.13.2
3
+ Version: 0.14.1
4
4
  Summary: Official API of OpenAGI Foundation
5
5
  Project-URL: Homepage, https://github.com/agiopen-org/oagi
6
6
  Author-email: OpenAGI Foundation <contact@agiopen.org>
@@ -31,6 +31,7 @@ Requires-Dist: openai>=1.3.0
31
31
  Requires-Dist: pydantic>=2.0.0
32
32
  Requires-Dist: rich>=10.0.0
33
33
  Provides-Extra: desktop
34
+ Requires-Dist: mss>=9.0.0; (sys_platform == 'win32') and extra == 'desktop'
34
35
  Requires-Dist: pillow>=9.0.0; extra == 'desktop'
35
36
  Requires-Dist: pyautogui>=0.9.54; extra == 'desktop'
36
37
  Requires-Dist: pyobjc-framework-applicationservices>=8.0; (sys_platform == 'darwin') and extra == 'desktop'
@@ -153,12 +154,15 @@ from oagi import AsyncPyautoguiActionHandler, PyautoguiConfig
153
154
 
154
155
  # Customize action behavior
155
156
  config = PyautoguiConfig(
156
- drag_duration=1.0, # Slower drags for precision (default: 0.5)
157
- scroll_amount=50, # Larger scroll steps (default: 30)
158
- wait_duration=2.0, # Longer waits (default: 1.0)
159
- action_pause=0.2, # More pause between actions (default: 0.1)
160
- hotkey_interval=0.1, # Interval between keys in hotkey combinations (default: 0.1)
161
- capslock_mode="session" # Caps lock mode: 'session' or 'system' (default: 'session')
157
+ drag_duration=1.0, # Slower drags for precision (default: 0.5)
158
+ scroll_amount=50, # Larger scroll steps (default: 2 on macOS, 100 on others)
159
+ wait_duration=2.0, # Longer waits for WAIT action (default: 1.0)
160
+ action_pause=0.2, # Pause between PyAutoGUI calls (default: 0.1)
161
+ hotkey_interval=0.1, # Interval between keys in hotkey combos (default: 0.1)
162
+ capslock_mode="session", # Caps lock mode: 'session' or 'system' (default: 'session')
163
+ macos_ctrl_to_cmd=True, # Replace ctrl with cmd on macOS (default: True)
164
+ click_pre_delay=0.1, # Delay after move before click (default: 0.1)
165
+ post_batch_delay=1.0, # Delay after actions before next screenshot (default: 1.0)
162
166
  )
163
167
 
164
168
  action_handler = AsyncPyautoguiActionHandler(config=config)
@@ -184,8 +188,14 @@ oagi agent modes
184
188
  # Check macOS permissions (screen recording & accessibility)
185
189
  oagi agent permission
186
190
 
191
+ # Print all available screens and their indices
192
+ oagi agent screens
193
+
187
194
  # Export execution history
188
195
  oagi agent run "Complete the form" --export html --export-file report.html
196
+
197
+ # Run with a specific screen
198
+ oagi agent run "Search weather on Google" --screen-index 1
189
199
  ```
190
200
 
191
201
  CLI options:
@@ -196,6 +206,7 @@ CLI options:
196
206
  - `--step-delay`: Delay after each action before next screenshot (default: 0.3s)
197
207
  - `--export`: Export format (markdown, html, json)
198
208
  - `--export-file`: Output file path for export
209
+ - `--screen-index`: Screen index for multi-screen environments
199
210
 
200
211
  ### Image Processing
201
212
 
@@ -271,20 +282,71 @@ from oagi import AsyncYdotoolActionHandler, YdotoolConfig
271
282
  # Customize action behavior
272
283
  config = YdotoolConfig(
273
284
  scroll_amount=50, # Larger scroll steps (default: 20)
274
- wait_duration=2.0, # Longer waits (default: 1.0)
275
- action_pause=1.0, # More pause between actions (default: 0.5)
285
+ wait_duration=2.0, # Longer waits for WAIT action (default: 1.0)
286
+ action_pause=1.0, # Pause between Ydotool calls (default: 0.5)
276
287
  capslock_mode="session", # Caps lock mode: 'session' or 'system' (default: 'session')
277
- socket_address="/tmp/ydotool.sock" # Customized Socket address for ydotool (ydotool uses 'YDOTOOL_SOCKET' environment variable by default)
288
+ socket_address="/tmp/ydotool.sock", # Custom socket address (default: YDOTOOL_SOCKET env var)
289
+ post_batch_delay=1.0, # Delay after actions before next screenshot (default: 1.0)
278
290
  )
279
291
 
280
292
  action_handler = AsyncYdotoolActionHandler(config=config)
281
293
  ```
282
294
 
295
+ ### Multi-Screen Execution
296
+ When running on multi-screen environments, you can choose which screen to use for task execution. The `ScreenManager` class provides methods to list available screens, while the `AsyncPyautoguiActionHandler` and `AsyncScreenshotMaker` classes allow you to set the target screen for actions and screenshots. In the result of `get_all_screens`, the primary screen is always the first one in the list and the remaining screens are appended in the ascending order of their origin coordinates.
297
+
298
+ ```python
299
+ import asyncio
300
+ import sys
301
+ from oagi import ScreenManager
302
+
303
+ # Must be initialized before importing pyautogui to ensure correct DPI awareness in Windows
304
+ if sys.platform == "win32":
305
+ ScreenManager.enable_windows_dpi_awareness()
306
+
307
+ from oagi import (
308
+ AsyncDefaultAgent,
309
+ AsyncPyautoguiActionHandler,
310
+ AsyncScreenshotMaker,
311
+ )
312
+
313
+ def print_all_screens():
314
+ """Print all available screens."""
315
+ screen_manager = ScreenManager()
316
+ all_screens = screen_manager.get_all_screens()
317
+ print("Available screens:")
318
+ for screen_index, screen in enumerate(all_screens):
319
+ print(f" - Index {screen_index}: {screen}")
320
+
321
+ async def main():
322
+ agent = AsyncDefaultAgent(max_steps=10)
323
+ action_handler = AsyncPyautoguiActionHandler()
324
+ image_provider = AsyncScreenshotMaker()
325
+ # Get all available screens
326
+ screen_manager = ScreenManager()
327
+ all_screens = screen_manager.get_all_screens()
328
+ # Choose a screen for task execution
329
+ screen_index = 1 # Use the second screen as example
330
+ target_screen = all_screens[screen_index]
331
+ # Set the target screen for handlers
332
+ action_handler.set_target_screen(target_screen)
333
+ image_provider.set_target_screen(target_screen)
334
+ completed = await agent.execute(
335
+ "Search weather on Google",
336
+ action_handler=action_handler,
337
+ image_provider=image_provider,
338
+ )
339
+ return completed
340
+
341
+ asyncio.run(main())
342
+ ```
343
+
283
344
  ## Examples
284
345
 
285
346
  See the [`examples/`](examples/) directory for more usage patterns:
286
347
  - `execute_task_auto.py` - Automated task execution with `AsyncDefaultAgent`
287
348
  - `execute_task_manual.py` - Manual step-by-step control with `Actor`
349
+ - `multi_screen_execution.py` - Automated task execution on multi-screen environments
288
350
  - `continued_session.py` - Continuing tasks across sessions
289
351
  - `screenshot_with_config.py` - Image compression and optimization
290
352
  - `socketio_server_basic.py` - Socket.IO server example
@@ -108,12 +108,15 @@ from oagi import AsyncPyautoguiActionHandler, PyautoguiConfig
108
108
 
109
109
  # Customize action behavior
110
110
  config = PyautoguiConfig(
111
- drag_duration=1.0, # Slower drags for precision (default: 0.5)
112
- scroll_amount=50, # Larger scroll steps (default: 30)
113
- wait_duration=2.0, # Longer waits (default: 1.0)
114
- action_pause=0.2, # More pause between actions (default: 0.1)
115
- hotkey_interval=0.1, # Interval between keys in hotkey combinations (default: 0.1)
116
- capslock_mode="session" # Caps lock mode: 'session' or 'system' (default: 'session')
111
+ drag_duration=1.0, # Slower drags for precision (default: 0.5)
112
+ scroll_amount=50, # Larger scroll steps (default: 2 on macOS, 100 on others)
113
+ wait_duration=2.0, # Longer waits for WAIT action (default: 1.0)
114
+ action_pause=0.2, # Pause between PyAutoGUI calls (default: 0.1)
115
+ hotkey_interval=0.1, # Interval between keys in hotkey combos (default: 0.1)
116
+ capslock_mode="session", # Caps lock mode: 'session' or 'system' (default: 'session')
117
+ macos_ctrl_to_cmd=True, # Replace ctrl with cmd on macOS (default: True)
118
+ click_pre_delay=0.1, # Delay after move before click (default: 0.1)
119
+ post_batch_delay=1.0, # Delay after actions before next screenshot (default: 1.0)
117
120
  )
118
121
 
119
122
  action_handler = AsyncPyautoguiActionHandler(config=config)
@@ -139,8 +142,14 @@ oagi agent modes
139
142
  # Check macOS permissions (screen recording & accessibility)
140
143
  oagi agent permission
141
144
 
145
+ # Print all available screens and their indices
146
+ oagi agent screens
147
+
142
148
  # Export execution history
143
149
  oagi agent run "Complete the form" --export html --export-file report.html
150
+
151
+ # Run with a specific screen
152
+ oagi agent run "Search weather on Google" --screen-index 1
144
153
  ```
145
154
 
146
155
  CLI options:
@@ -151,6 +160,7 @@ CLI options:
151
160
  - `--step-delay`: Delay after each action before next screenshot (default: 0.3s)
152
161
  - `--export`: Export format (markdown, html, json)
153
162
  - `--export-file`: Output file path for export
163
+ - `--screen-index`: Screen index for multi-screen environments
154
164
 
155
165
  ### Image Processing
156
166
 
@@ -226,20 +236,71 @@ from oagi import AsyncYdotoolActionHandler, YdotoolConfig
226
236
  # Customize action behavior
227
237
  config = YdotoolConfig(
228
238
  scroll_amount=50, # Larger scroll steps (default: 20)
229
- wait_duration=2.0, # Longer waits (default: 1.0)
230
- action_pause=1.0, # More pause between actions (default: 0.5)
239
+ wait_duration=2.0, # Longer waits for WAIT action (default: 1.0)
240
+ action_pause=1.0, # Pause between Ydotool calls (default: 0.5)
231
241
  capslock_mode="session", # Caps lock mode: 'session' or 'system' (default: 'session')
232
- socket_address="/tmp/ydotool.sock" # Customized Socket address for ydotool (ydotool uses 'YDOTOOL_SOCKET' environment variable by default)
242
+ socket_address="/tmp/ydotool.sock", # Custom socket address (default: YDOTOOL_SOCKET env var)
243
+ post_batch_delay=1.0, # Delay after actions before next screenshot (default: 1.0)
233
244
  )
234
245
 
235
246
  action_handler = AsyncYdotoolActionHandler(config=config)
236
247
  ```
237
248
 
249
+ ### Multi-Screen Execution
250
+ When running on multi-screen environments, you can choose which screen to use for task execution. The `ScreenManager` class provides methods to list available screens, while the `AsyncPyautoguiActionHandler` and `AsyncScreenshotMaker` classes allow you to set the target screen for actions and screenshots. In the result of `get_all_screens`, the primary screen is always the first one in the list and the remaining screens are appended in the ascending order of their origin coordinates.
251
+
252
+ ```python
253
+ import asyncio
254
+ import sys
255
+ from oagi import ScreenManager
256
+
257
+ # Must be initialized before importing pyautogui to ensure correct DPI awareness in Windows
258
+ if sys.platform == "win32":
259
+ ScreenManager.enable_windows_dpi_awareness()
260
+
261
+ from oagi import (
262
+ AsyncDefaultAgent,
263
+ AsyncPyautoguiActionHandler,
264
+ AsyncScreenshotMaker,
265
+ )
266
+
267
+ def print_all_screens():
268
+ """Print all available screens."""
269
+ screen_manager = ScreenManager()
270
+ all_screens = screen_manager.get_all_screens()
271
+ print("Available screens:")
272
+ for screen_index, screen in enumerate(all_screens):
273
+ print(f" - Index {screen_index}: {screen}")
274
+
275
+ async def main():
276
+ agent = AsyncDefaultAgent(max_steps=10)
277
+ action_handler = AsyncPyautoguiActionHandler()
278
+ image_provider = AsyncScreenshotMaker()
279
+ # Get all available screens
280
+ screen_manager = ScreenManager()
281
+ all_screens = screen_manager.get_all_screens()
282
+ # Choose a screen for task execution
283
+ screen_index = 1 # Use the second screen as example
284
+ target_screen = all_screens[screen_index]
285
+ # Set the target screen for handlers
286
+ action_handler.set_target_screen(target_screen)
287
+ image_provider.set_target_screen(target_screen)
288
+ completed = await agent.execute(
289
+ "Search weather on Google",
290
+ action_handler=action_handler,
291
+ image_provider=image_provider,
292
+ )
293
+ return completed
294
+
295
+ asyncio.run(main())
296
+ ```
297
+
238
298
  ## Examples
239
299
 
240
300
  See the [`examples/`](examples/) directory for more usage patterns:
241
301
  - `execute_task_auto.py` - Automated task execution with `AsyncDefaultAgent`
242
302
  - `execute_task_manual.py` - Manual step-by-step control with `Actor`
303
+ - `multi_screen_execution.py` - Automated task execution on multi-screen environments
243
304
  - `continued_session.py` - Continuing tasks across sessions
244
305
  - `screenshot_with_config.py` - Image compression and optimization
245
306
  - `socketio_server_basic.py` - Socket.IO server example
@@ -0,0 +1,74 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+ import asyncio
9
+ import sys
10
+
11
+ from oagi import ScreenManager
12
+
13
+ # Must be initialized before importing pyautogui to ensure correct DPI awareness in Windows
14
+ if sys.platform == "win32":
15
+ ScreenManager.enable_windows_dpi_awareness()
16
+
17
+ from oagi import (
18
+ AsyncDefaultAgent,
19
+ AsyncPyautoguiActionHandler,
20
+ AsyncScreenshotMaker,
21
+ )
22
+
23
+
24
+ def print_all_screens():
25
+ """Print all available screens."""
26
+ screen_manager = ScreenManager()
27
+ all_screens = screen_manager.get_all_screens()
28
+ print("Available screens:")
29
+ for screen_index, screen in enumerate(all_screens):
30
+ print(f" - Index {screen_index}: {screen}")
31
+
32
+
33
+ def execute_task_on_specific_screen(task_desc, max_steps=5, screen_index=0):
34
+ """Synchronous wrapper for async task execution."""
35
+ # Print all screens and choose one screen for task execution
36
+ return asyncio.run(
37
+ async_execute_task_on_specific_screen(task_desc, max_steps, screen_index)
38
+ )
39
+
40
+
41
+ async def async_execute_task_on_specific_screen(task_desc, max_steps=5, screen_index=0):
42
+ # set OAGI_API_KEY and OAGI_BASE_URL
43
+ # or AsyncDefaultAgent(api_key="your_api_key", base_url="your_base_url")
44
+ agent = AsyncDefaultAgent(max_steps=max_steps)
45
+
46
+ # executor = lambda actions: print(actions) for debugging
47
+ action_handler = AsyncPyautoguiActionHandler()
48
+ image_provider = AsyncScreenshotMaker()
49
+
50
+ # Get the target screen info for task execution
51
+ screen_manager = ScreenManager()
52
+ all_screens = screen_manager.get_all_screens()
53
+ screen = all_screens[screen_index]
54
+ # Set the screen index for handlers
55
+ action_handler.set_target_screen(screen)
56
+ image_provider.set_target_screen(screen)
57
+
58
+ is_completed = await agent.execute(
59
+ task_desc,
60
+ action_handler=action_handler,
61
+ image_provider=image_provider,
62
+ )
63
+
64
+ return is_completed, await image_provider.last_image()
65
+
66
+
67
+ if __name__ == "__main__":
68
+ # Example task
69
+ task_desc = "Open Chrome and navigate to google.com"
70
+ screen_index = 1 # Use the second screen as example
71
+ success, image = execute_task_on_specific_screen(
72
+ task_desc, screen_index=screen_index
73
+ )
74
+ print(f"\nFinal result: {'Success' if success else 'Failed'}")
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "oagi"
7
- version = "0.13.2"
7
+ version = "0.14.1"
8
8
  description = "Official API of OpenAGI Foundation (metapackage with all features)"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -16,7 +16,7 @@ authors = [
16
16
  requires-python = ">= 3.10"
17
17
 
18
18
  dependencies = [
19
- "oagi-core[desktop,server]==0.13.2",
19
+ "oagi-core[desktop,server]==0.14.1",
20
20
  ]
21
21
 
22
22
  [project.urls]
@@ -516,20 +516,29 @@ dependencies = [
516
516
  ]
517
517
  sdist = { url = "https://files.pythonhosted.org/packages/28/fa/b2ba8229b9381e8f6381c1dcae6f4159a7f72349e414ed19cfbbd1817173/MouseInfo-0.1.3.tar.gz", hash = "sha256:2c62fb8885062b8e520a3cce0a297c657adcc08c60952eb05bc8256ef6f7f6e7", size = 10850, upload-time = "2020-03-27T21:20:10.136Z" }
518
518
 
519
+ [[package]]
520
+ name = "mss"
521
+ version = "10.1.0"
522
+ source = { registry = "https://pypi.org/simple" }
523
+ sdist = { url = "https://files.pythonhosted.org/packages/40/ca/49b67437a8c46d9732c9c274d7b1fc0c181cfe290d699a0c5e94701dfe79/mss-10.1.0.tar.gz", hash = "sha256:7182baf7ee16ca569e2804028b6ab9bcbf6be5c46fc2880840f33b513b9cb4f8", size = 84200, upload-time = "2025-08-16T12:11:00.119Z" }
524
+ wheels = [
525
+ { url = "https://files.pythonhosted.org/packages/23/28/1e3e5cd1d677cca68b26166f704f72e35b1e8b6d5076d8ebeebc4e40a649/mss-10.1.0-py3-none-any.whl", hash = "sha256:9179c110cadfef5dc6dc4a041a0cd161c74c379218648e6640b48c6b5cfe8918", size = 24525, upload-time = "2025-08-16T12:10:59.111Z" },
526
+ ]
527
+
519
528
  [[package]]
520
529
  name = "oagi"
521
- version = "0.13.2"
530
+ version = "0.14.1"
522
531
  source = { editable = "." }
523
532
  dependencies = [
524
533
  { name = "oagi-core", extra = ["desktop", "server"] },
525
534
  ]
526
535
 
527
536
  [package.metadata]
528
- requires-dist = [{ name = "oagi-core", extras = ["desktop", "server"], specifier = "==0.13.1" }]
537
+ requires-dist = [{ name = "oagi-core", extras = ["desktop", "server"], specifier = "==0.14.0" }]
529
538
 
530
539
  [[package]]
531
540
  name = "oagi-core"
532
- version = "0.13.1"
541
+ version = "0.14.0"
533
542
  source = { registry = "https://pypi.org/simple" }
534
543
  dependencies = [
535
544
  { name = "httpx" },
@@ -537,13 +546,14 @@ dependencies = [
537
546
  { name = "pydantic" },
538
547
  { name = "rich" },
539
548
  ]
540
- sdist = { url = "https://files.pythonhosted.org/packages/62/b0/6006c57afc693a4afdc5389f1c93e83654745ec2b25d33c091621aec7d15/oagi_core-0.13.1.tar.gz", hash = "sha256:8dcbf45c6eb7203573d04122ebb53c67d7c955bb6bbfe8332fe2e641e7cb689f", size = 304385, upload-time = "2026-01-12T04:14:18.758Z" }
549
+ sdist = { url = "https://files.pythonhosted.org/packages/4d/2f/74a0a8df68ad16e66f5d84df40c439d4de059b9d2cce8b95972f7e9ff7a6/oagi_core-0.14.0.tar.gz", hash = "sha256:d72f0e81b6427ec08642678df297ff1b73b40c856d4c78df33d7e541af3bd6fd", size = 309074, upload-time = "2026-01-20T15:24:11.961Z" }
541
550
  wheels = [
542
- { url = "https://files.pythonhosted.org/packages/6e/78/0a04d5a28313ab6dc783a5c4d8b32c1871fa2a140f75dfe42f56024c8865/oagi_core-0.13.1-py3-none-any.whl", hash = "sha256:b29637ca080fa7092224231b113b35c07f09657bed203e8e26e13e2c2a65665f", size = 110049, upload-time = "2026-01-12T04:14:17.645Z" },
551
+ { url = "https://files.pythonhosted.org/packages/22/c7/8061f05bf2f5225a0424ec890f50b8b38dc1e44489232a82a2405ea4a1c5/oagi_core-0.14.0-py3-none-any.whl", hash = "sha256:011a33b641dd5a881e9322bbb82e3394921574b8ce840e99b21f2de02f2d71c5", size = 114399, upload-time = "2026-01-20T15:24:10.801Z" },
543
552
  ]
544
553
 
545
554
  [package.optional-dependencies]
546
555
  desktop = [
556
+ { name = "mss", marker = "sys_platform == 'win32'" },
547
557
  { name = "pillow" },
548
558
  { name = "pyautogui" },
549
559
  { name = "pyobjc-framework-applicationservices", marker = "sys_platform == 'darwin'" },
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "oagi-core"
7
- version = "0.13.2"
7
+ version = "0.14.1"
8
8
  description = "Official API of OpenAGI Foundation"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -33,7 +33,8 @@ desktop = [
33
33
  "pyautogui>=0.9.54",
34
34
  "pyobjc-framework-Quartz>=8.0; sys_platform == 'darwin'",
35
35
  "pyobjc-framework-ApplicationServices>=8.0; sys_platform == 'darwin'",
36
- "screeninfo>=0.8.1"
36
+ "screeninfo>=0.8.1",
37
+ "mss>=9.0.0; sys_platform == 'win32'",
37
38
  ]
38
39
  server = [
39
40
  "fastapi[standard]>=0.100.0",
@@ -81,6 +81,7 @@ _LAZY_IMPORTS_DATA: dict[str, tuple[str, str | None, str | None]] = {
81
81
  "screeninfo",
82
82
  "desktop",
83
83
  ),
84
+ "ScreenManager": ("oagi.handler.screen_manager", None, None),
84
85
  }
85
86
 
86
87
  if TYPE_CHECKING:
@@ -95,6 +96,7 @@ if TYPE_CHECKING:
95
96
  PyautoguiActionHandler,
96
97
  PyautoguiConfig,
97
98
  )
99
+ from oagi.handler.screen_manager import ScreenManager
98
100
  from oagi.handler.screenshot_maker import ScreenshotMaker
99
101
  from oagi.handler.ydotool_action_handler import YdotoolActionHandler, YdotoolConfig
100
102
  from oagi.server.config import ServerConfig
@@ -170,4 +172,6 @@ __all__ = [
170
172
  "AsyncYdotoolActionHandler",
171
173
  "YdotoolActionHandler",
172
174
  "YdotoolConfig",
175
+ # Lazy imports - Screen manager
176
+ "ScreenManager",
173
177
  ]
@@ -6,7 +6,6 @@
6
6
  # Licensed under the MIT License.
7
7
  # -----------------------------------------------------------------------------
8
8
 
9
- import asyncio
10
9
  import logging
11
10
 
12
11
  from .. import AsyncActor
@@ -16,7 +15,7 @@ from ..constants import (
16
15
  DEFAULT_TEMPERATURE,
17
16
  MODEL_ACTOR,
18
17
  )
19
- from ..handler.utils import reset_handler
18
+ from ..handler.utils import configure_handler_delay, reset_handler
20
19
  from ..types import (
21
20
  ActionEvent,
22
21
  AsyncActionHandler,
@@ -72,6 +71,9 @@ class AsyncDefaultAgent:
72
71
  # Reset handler state at automation start
73
72
  reset_handler(action_handler)
74
73
 
74
+ # Configure handler's post_batch_delay from agent's step_delay
75
+ configure_handler_delay(action_handler, self.step_delay)
76
+
75
77
  for i in range(self.max_steps):
76
78
  step_num = i + 1
77
79
  logger.debug(f"Executing step {step_num}/{self.max_steps}")
@@ -127,10 +129,6 @@ class AsyncDefaultAgent:
127
129
  )
128
130
  )
129
131
 
130
- # Wait after actions before next screenshot
131
- if self.step_delay > 0:
132
- await asyncio.sleep(self.step_delay)
133
-
134
132
  # Check if task is complete
135
133
  if step.stop:
136
134
  logger.info(f"Task completed successfully after {step_num} steps")
@@ -6,7 +6,6 @@
6
6
  # Licensed under the MIT License.
7
7
  # -----------------------------------------------------------------------------
8
8
 
9
- import asyncio
10
9
  import logging
11
10
  from datetime import datetime
12
11
  from typing import Any
@@ -19,7 +18,7 @@ from oagi.constants import (
19
18
  DEFAULT_TEMPERATURE,
20
19
  MODEL_ACTOR,
21
20
  )
22
- from oagi.handler.utils import reset_handler
21
+ from oagi.handler.utils import configure_handler_delay, reset_handler
23
22
  from oagi.types import (
24
23
  URL,
25
24
  ActionEvent,
@@ -126,6 +125,9 @@ class TaskeeAgent(AsyncAgent):
126
125
  # Reset handler state at todo execution start
127
126
  reset_handler(action_handler)
128
127
 
128
+ # Configure handler's post_batch_delay from agent's step_delay
129
+ configure_handler_delay(action_handler, self.step_delay)
130
+
129
131
  self.current_todo = instruction
130
132
  self.actions = []
131
133
  self.total_actions = 0
@@ -355,10 +357,6 @@ class TaskeeAgent(AsyncAgent):
355
357
  self.total_actions += len(step.actions)
356
358
  self.since_reflection += len(step.actions)
357
359
 
358
- # Wait after actions before next screenshot
359
- if self.step_delay > 0:
360
- await asyncio.sleep(self.step_delay)
361
-
362
360
  steps_taken += 1
363
361
 
364
362
  # Check if task is complete
@@ -86,6 +86,11 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
86
86
  type=float,
87
87
  help=f"Delay in seconds after each step before next screenshot (default: {DEFAULT_STEP_DELAY})",
88
88
  )
89
+ run_parser.add_argument(
90
+ "--screen-index",
91
+ type=int,
92
+ help="Choose the index of screen to run the task",
93
+ )
89
94
 
90
95
  # agent modes command
91
96
  agent_subparsers.add_parser("modes", help="List available agent modes")
@@ -96,6 +101,11 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
96
101
  help="Check macOS permissions for screen recording and accessibility",
97
102
  )
98
103
 
104
+ # agent screens command
105
+ agent_subparsers.add_parser(
106
+ "screens", help="List all available screens for agent execution"
107
+ )
108
+
99
109
 
100
110
  def handle_agent_command(args: argparse.Namespace) -> None:
101
111
  if args.agent_command == "run":
@@ -104,6 +114,19 @@ def handle_agent_command(args: argparse.Namespace) -> None:
104
114
  list_modes()
105
115
  elif args.agent_command == "permission":
106
116
  check_permissions()
117
+ elif args.agent_command == "screens":
118
+ list_screens()
119
+
120
+
121
+ def list_screens() -> None:
122
+ """List all available screens for agent execution."""
123
+ from oagi import ScreenManager # noqa: PLC0415
124
+
125
+ screen_manager = ScreenManager()
126
+ screens = screen_manager.get_all_screens()
127
+ print("Available screens:")
128
+ for screen_index, screen in enumerate(screens):
129
+ print(f" - Index {screen_index}: {screen}")
107
130
 
108
131
 
109
132
  def list_modes() -> None:
@@ -212,6 +235,22 @@ def run_agent(args: argparse.Namespace) -> None:
212
235
  from oagi.agent import create_agent # noqa: PLC0415
213
236
  from oagi.handler.wayland_support import is_wayland_display_server # noqa: PLC0415
214
237
 
238
+ # Create screen manager for multi-screen support
239
+ # Must be initialized before importing pyautogui to ensure correct DPI awareness in Windows
240
+ target_screen = None
241
+ if args.screen_index is not None:
242
+ from oagi.handler import ScreenManager # noqa: PLC0415
243
+
244
+ screen_index = args.screen_index
245
+ screen_manager = ScreenManager()
246
+ all_screens = screen_manager.get_all_screens()
247
+ if screen_index >= len(all_screens) or screen_index < 0:
248
+ raise ValueError(
249
+ f"Error: Screen index {screen_index} not found. Available screen indices: {list(range(len(all_screens)))}"
250
+ )
251
+ target_screen = all_screens[screen_index]
252
+ print(f"Target screen: {target_screen}")
253
+
215
254
  # Select appropriate action handler based on display server
216
255
  if is_wayland_display_server():
217
256
  check_optional_dependency("screeninfo", "Agent execution (Wayland)", "desktop")
@@ -280,6 +319,11 @@ def run_agent(args: argparse.Namespace) -> None:
280
319
  # Create image provider
281
320
  image_provider = AsyncScreenshotMaker()
282
321
 
322
+ if target_screen:
323
+ # Set the target screen for the image and action provider
324
+ image_provider.set_target_screen(target_screen)
325
+ action_handler.set_target_screen(target_screen)
326
+
283
327
  if args.instruction:
284
328
  print(f"Starting agent with instruction: {args.instruction}")
285
329
  else:
@@ -35,7 +35,7 @@ DEFAULT_REFLECTION_INTERVAL = 4
35
35
  DEFAULT_REFLECTION_INTERVAL_TASKER = 20
36
36
 
37
37
  # Timing & Delays
38
- DEFAULT_STEP_DELAY = 0.3
38
+ DEFAULT_STEP_DELAY = 1.0
39
39
 
40
40
  # Temperature Defaults
41
41
  DEFAULT_TEMPERATURE = 0.5
@@ -21,6 +21,7 @@ _LAZY_IMPORTS: dict[str, str] = {
21
21
  "AsyncYdotoolActionHandler": "oagi.handler.async_ydotool_action_handler",
22
22
  "YdotoolActionHandler": "oagi.handler.ydotool_action_handler",
23
23
  "YdotoolConfig": "oagi.handler.ydotool_action_handler",
24
+ "ScreenManager": "oagi.handler.screen_manager",
24
25
  }
25
26
 
26
27
  if TYPE_CHECKING:
@@ -32,6 +33,7 @@ if TYPE_CHECKING:
32
33
  PyautoguiActionHandler,
33
34
  PyautoguiConfig,
34
35
  )
36
+ from oagi.handler.screen_manager import ScreenManager
35
37
  from oagi.handler.screenshot_maker import ScreenshotMaker
36
38
  from oagi.handler.ydotool_action_handler import YdotoolActionHandler, YdotoolConfig
37
39
 
@@ -60,4 +62,5 @@ __all__ = [
60
62
  "YdotoolConfig",
61
63
  "YdotoolActionHandler",
62
64
  "AsyncYdotoolActionHandler",
65
+ "ScreenManager",
63
66
  ]
@@ -8,6 +8,8 @@
8
8
 
9
9
  import asyncio
10
10
 
11
+ from oagi.handler.screen_manager import Screen
12
+
11
13
  from ..types import Action
12
14
  from .pyautogui_action_handler import PyautoguiActionHandler, PyautoguiConfig
13
15
 
@@ -27,7 +29,16 @@ class AsyncPyautoguiActionHandler:
27
29
  config: PyautoguiConfig instance for customizing behavior
28
30
  """
29
31
  self.sync_handler = PyautoguiActionHandler(config=config)
30
- self.config = config or PyautoguiConfig()
32
+ # Share the same config object so configure_handler_delay() works
33
+ self.config = self.sync_handler.config
34
+
35
+ def set_target_screen(self, screen: Screen) -> None:
36
+ """Set the target screen for the action handler.
37
+
38
+ Args:
39
+ screen (Screen): The screen object to set as the target.
40
+ """
41
+ self.sync_handler.set_target_screen(screen)
31
42
 
32
43
  def reset(self):
33
44
  """Reset handler state.