oagi-core 0.10.3__tar.gz → 0.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. {oagi_core-0.10.3 → oagi_core-0.11.0}/PKG-INFO +34 -1
  2. {oagi_core-0.10.3 → oagi_core-0.11.0}/README.md +33 -0
  3. {oagi_core-0.10.3 → oagi_core-0.11.0}/metapackage/pyproject.toml +2 -2
  4. {oagi_core-0.10.3 → oagi_core-0.11.0}/metapackage/uv.lock +5 -5
  5. {oagi_core-0.10.3 → oagi_core-0.11.0}/pyproject.toml +1 -1
  6. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/default.py +4 -0
  7. oagi_core-0.11.0/src/oagi/agent/factories.py +162 -0
  8. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/tasker/planner.py +17 -7
  9. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/tasker/taskee_agent.py +19 -4
  10. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/tasker/tasker_agent.py +4 -0
  11. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/cli/agent.py +54 -30
  12. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/handler/__init__.py +16 -0
  13. oagi_core-0.11.0/src/oagi/handler/_macos.py +192 -0
  14. oagi_core-0.11.0/src/oagi/handler/_windows.py +101 -0
  15. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/handler/async_pyautogui_action_handler.py +8 -0
  16. oagi_core-0.11.0/src/oagi/handler/capslock_manager.py +55 -0
  17. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/handler/pyautogui_action_handler.py +21 -39
  18. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/server/session_store.py +3 -3
  19. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/server/socketio_server.py +4 -4
  20. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/types/__init__.py +2 -1
  21. oagi_core-0.11.0/src/oagi/types/url.py +28 -0
  22. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_agent_registry.py +4 -3
  23. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_pyautogui_action_handler.py +102 -29
  24. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_server/test_session_store.py +2 -2
  25. {oagi_core-0.10.3 → oagi_core-0.11.0}/uv.lock +1 -1
  26. oagi_core-0.10.3/src/oagi/agent/factories.py +0 -90
  27. oagi_core-0.10.3/src/oagi/handler/_macos.py +0 -55
  28. oagi_core-0.10.3/src/oagi/types/url.py +0 -3
  29. {oagi_core-0.10.3 → oagi_core-0.11.0}/.github/workflows/ci.yml +0 -0
  30. {oagi_core-0.10.3 → oagi_core-0.11.0}/.github/workflows/release.yml +0 -0
  31. {oagi_core-0.10.3 → oagi_core-0.11.0}/.gitignore +0 -0
  32. {oagi_core-0.10.3 → oagi_core-0.11.0}/.python-version +0 -0
  33. {oagi_core-0.10.3 → oagi_core-0.11.0}/CONTRIBUTING.md +0 -0
  34. {oagi_core-0.10.3 → oagi_core-0.11.0}/LICENSE +0 -0
  35. {oagi_core-0.10.3 → oagi_core-0.11.0}/Makefile +0 -0
  36. {oagi_core-0.10.3 → oagi_core-0.11.0}/examples/async_google_weather.py +0 -0
  37. {oagi_core-0.10.3 → oagi_core-0.11.0}/examples/execute_task_auto.py +0 -0
  38. {oagi_core-0.10.3 → oagi_core-0.11.0}/examples/execute_task_manual.py +0 -0
  39. {oagi_core-0.10.3 → oagi_core-0.11.0}/examples/google_weather.py +0 -0
  40. {oagi_core-0.10.3 → oagi_core-0.11.0}/examples/screenshot_with_config.py +0 -0
  41. {oagi_core-0.10.3 → oagi_core-0.11.0}/examples/tasker_agent_example.py +0 -0
  42. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/__init__.py +0 -0
  43. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/__init__.py +0 -0
  44. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/observer/__init__.py +0 -0
  45. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/observer/agent_observer.py +0 -0
  46. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/observer/events.py +0 -0
  47. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/observer/exporters.py +0 -0
  48. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/observer/protocol.py +0 -0
  49. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/observer/report_template.html +0 -0
  50. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/protocol.py +0 -0
  51. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/registry.py +0 -0
  52. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/tasker/__init__.py +0 -0
  53. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/tasker/memory.py +0 -0
  54. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/agent/tasker/models.py +0 -0
  55. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/cli/__init__.py +0 -0
  56. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/cli/display.py +0 -0
  57. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/cli/main.py +0 -0
  58. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/cli/server.py +0 -0
  59. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/cli/tracking.py +0 -0
  60. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/cli/utils.py +0 -0
  61. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/client/__init__.py +0 -0
  62. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/client/async_.py +0 -0
  63. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/client/base.py +0 -0
  64. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/client/sync.py +0 -0
  65. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/constants.py +0 -0
  66. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/exceptions.py +0 -0
  67. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/handler/async_screenshot_maker.py +0 -0
  68. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/handler/pil_image.py +0 -0
  69. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/handler/screenshot_maker.py +0 -0
  70. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/logging.py +0 -0
  71. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/server/__init__.py +0 -0
  72. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/server/agent_wrappers.py +0 -0
  73. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/server/config.py +0 -0
  74. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/server/main.py +0 -0
  75. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/server/models.py +0 -0
  76. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/task/__init__.py +0 -0
  77. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/task/async_.py +0 -0
  78. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/task/async_short.py +0 -0
  79. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/task/base.py +0 -0
  80. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/task/short.py +0 -0
  81. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/task/sync.py +0 -0
  82. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/types/action_handler.py +0 -0
  83. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/types/async_action_handler.py +0 -0
  84. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/types/async_image_provider.py +0 -0
  85. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/types/image.py +0 -0
  86. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/types/image_provider.py +0 -0
  87. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/types/models/__init__.py +0 -0
  88. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/types/models/action.py +0 -0
  89. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/types/models/client.py +0 -0
  90. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/types/models/image_config.py +0 -0
  91. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/types/models/step.py +0 -0
  92. {oagi_core-0.10.3 → oagi_core-0.11.0}/src/oagi/types/step_observer.py +0 -0
  93. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/__init__.py +0 -0
  94. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/conftest.py +0 -0
  95. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_action_parsing.py +0 -0
  96. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_actor.py +0 -0
  97. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_agent/test_agent_wrappers.py +0 -0
  98. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_agent/test_default_agent.py +0 -0
  99. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_async_actor.py +0 -0
  100. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_async_client.py +0 -0
  101. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_async_handlers.py +0 -0
  102. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_cli.py +0 -0
  103. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_logging.py +0 -0
  104. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_mac_double_click.py +0 -0
  105. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_observer.py +0 -0
  106. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_pil_image.py +0 -0
  107. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_planner.py +0 -0
  108. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_planner_memory.py +0 -0
  109. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_screenshot_maker.py +0 -0
  110. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_server/__init__.py +0 -0
  111. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_server/test_config.py +0 -0
  112. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_server/test_socketio_integration.py +0 -0
  113. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_sync_client.py +0 -0
  114. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_taskee_agent.py +0 -0
  115. {oagi_core-0.10.3 → oagi_core-0.11.0}/tests/test_tasker_agent.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: oagi-core
3
- Version: 0.10.3
3
+ Version: 0.11.0
4
4
  Summary: Official API of OpenAGI Foundation
5
5
  Project-URL: Homepage, https://github.com/agiopen-org/oagi
6
6
  Author-email: OpenAGI Foundation <contact@agiopen.org>
@@ -116,6 +116,39 @@ config = PyautoguiConfig(
116
116
  action_handler = AsyncPyautoguiActionHandler(config=config)
117
117
  ```
118
118
 
119
+ ### Command Line Interface
120
+
121
+ Run agents directly from the terminal:
122
+
123
+ ```bash
124
+ # Run with actor model
125
+ oagi agent run "Go to nasdaq.com, search for AAPL. Under More, go to Insider Activity" --model lux-actor-1
126
+
127
+ # Run with thinker mode (uses lux-thinker-1 model with more steps)
128
+ oagi agent run "Look up the store hours for the nearest Apple Store to zip code 23456 using the Apple Store Locator" --model lux-thinker-1
129
+
130
+ # Run pre-configured tasker workflows (no instruction needed)
131
+ oagi agent run --mode tasker:software_qa
132
+
133
+ # List all available modes
134
+ oagi agent modes
135
+
136
+ # Check macOS permissions (screen recording & accessibility)
137
+ oagi agent permission
138
+
139
+ # Export execution history
140
+ oagi agent run "Complete the form" --export html --export-file report.html
141
+ ```
142
+
143
+ CLI options:
144
+ - `--mode`: Agent mode (default: actor). Use `oagi agent modes` to list available modes
145
+ - `--model`: Override the model (default: determined by mode)
146
+ - `--max-steps`: Maximum steps (default: determined by mode)
147
+ - `--temperature`: Sampling temperature (default: determined by mode)
148
+ - `--step-delay`: Delay after each action before next screenshot (default: 0.3s)
149
+ - `--export`: Export format (markdown, html, json)
150
+ - `--export-file`: Output file path for export
151
+
119
152
  ### Image Processing
120
153
 
121
154
  Process and optimize images before sending to API:
@@ -73,6 +73,39 @@ config = PyautoguiConfig(
73
73
  action_handler = AsyncPyautoguiActionHandler(config=config)
74
74
  ```
75
75
 
76
+ ### Command Line Interface
77
+
78
+ Run agents directly from the terminal:
79
+
80
+ ```bash
81
+ # Run with actor model
82
+ oagi agent run "Go to nasdaq.com, search for AAPL. Under More, go to Insider Activity" --model lux-actor-1
83
+
84
+ # Run with thinker mode (uses lux-thinker-1 model with more steps)
85
+ oagi agent run "Look up the store hours for the nearest Apple Store to zip code 23456 using the Apple Store Locator" --model lux-thinker-1
86
+
87
+ # Run pre-configured tasker workflows (no instruction needed)
88
+ oagi agent run --mode tasker:software_qa
89
+
90
+ # List all available modes
91
+ oagi agent modes
92
+
93
+ # Check macOS permissions (screen recording & accessibility)
94
+ oagi agent permission
95
+
96
+ # Export execution history
97
+ oagi agent run "Complete the form" --export html --export-file report.html
98
+ ```
99
+
100
+ CLI options:
101
+ - `--mode`: Agent mode (default: actor). Use `oagi agent modes` to list available modes
102
+ - `--model`: Override the model (default: determined by mode)
103
+ - `--max-steps`: Maximum steps (default: determined by mode)
104
+ - `--temperature`: Sampling temperature (default: determined by mode)
105
+ - `--step-delay`: Delay after each action before next screenshot (default: 0.3s)
106
+ - `--export`: Export format (markdown, html, json)
107
+ - `--export-file`: Output file path for export
108
+
76
109
  ### Image Processing
77
110
 
78
111
  Process and optimize images before sending to API:
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "oagi"
7
- version = "0.10.3"
7
+ version = "0.11.0"
8
8
  description = "Official API of OpenAGI Foundation (metapackage with all features)"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -16,7 +16,7 @@ authors = [
16
16
  requires-python = ">= 3.10"
17
17
 
18
18
  dependencies = [
19
- "oagi-core[desktop,server]==0.10.3",
19
+ "oagi-core[desktop,server]==0.11.0",
20
20
  ]
21
21
 
22
22
  [project.urls]
@@ -397,27 +397,27 @@ sdist = { url = "https://files.pythonhosted.org/packages/28/fa/b2ba8229b9381e8f6
397
397
 
398
398
  [[package]]
399
399
  name = "oagi"
400
- version = "0.10.3"
400
+ version = "0.11.0"
401
401
  source = { editable = "." }
402
402
  dependencies = [
403
403
  { name = "oagi-core", extra = ["desktop", "server"] },
404
404
  ]
405
405
 
406
406
  [package.metadata]
407
- requires-dist = [{ name = "oagi-core", extras = ["desktop", "server"], specifier = "==0.10.2" }]
407
+ requires-dist = [{ name = "oagi-core", extras = ["desktop", "server"], specifier = "==0.10.3" }]
408
408
 
409
409
  [[package]]
410
410
  name = "oagi-core"
411
- version = "0.10.2"
411
+ version = "0.10.3"
412
412
  source = { registry = "https://pypi.org/simple" }
413
413
  dependencies = [
414
414
  { name = "httpx" },
415
415
  { name = "pydantic" },
416
416
  { name = "rich" },
417
417
  ]
418
- sdist = { url = "https://files.pythonhosted.org/packages/07/2f/11b7e37049b2faa1e1147a75624c024a93c32287f243c213ca96fc096452/oagi_core-0.10.2.tar.gz", hash = "sha256:2d7fb47031cdc2155ea5ea9c06b4b58c0d70091de95a36fa4dedf5d710a09862", size = 267130, upload-time = "2025-11-26T13:50:36.977Z" }
418
+ sdist = { url = "https://files.pythonhosted.org/packages/60/97/c54ecd43969132f902086d4f7fa7eb8d1f5e5087774c81df463a27b3017f/oagi_core-0.10.3.tar.gz", hash = "sha256:46417fde3b20427338d7e2798246960fd4bc6515e93f95bd32b8236337d6cfcd", size = 268408, upload-time = "2025-11-30T11:34:46.494Z" }
419
419
  wheels = [
420
- { url = "https://files.pythonhosted.org/packages/d0/c8/a1d95327afe6237eaf474d74ba66461e51baaf5948d458cb12be62dbf8a8/oagi_core-0.10.2-py3-none-any.whl", hash = "sha256:3b9dd3ade24a1c605d671ed7f35efcd00f729be603a63173c7f92f8177d56750", size = 87220, upload-time = "2025-11-26T13:50:35.872Z" },
420
+ { url = "https://files.pythonhosted.org/packages/81/22/47271b7d2ac5b7bcaa9d58819b00dfa8e3c0aa57ea3db6e2f6fd681fd0ce/oagi_core-0.10.3-py3-none-any.whl", hash = "sha256:fc91a7bb29ffdcf490bb7edc2574826229ba86726ec6acd505379bdf09645721", size = 88927, upload-time = "2025-11-30T11:34:45.531Z" },
421
421
  ]
422
422
 
423
423
  [package.optional-dependencies]
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "oagi-core"
7
- version = "0.10.3"
7
+ version = "0.11.0"
8
8
  description = "Official API of OpenAGI Foundation"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -16,6 +16,7 @@ from ..constants import (
16
16
  DEFAULT_TEMPERATURE,
17
17
  MODEL_ACTOR,
18
18
  )
19
+ from ..handler import reset_handler
19
20
  from ..types import (
20
21
  ActionEvent,
21
22
  AsyncActionHandler,
@@ -68,6 +69,9 @@ class AsyncDefaultAgent:
68
69
  logger.info(f"Starting async task execution: {instruction}")
69
70
  await self.actor.init_task(instruction, max_steps=self.max_steps)
70
71
 
72
+ # Reset handler state at automation start
73
+ reset_handler(action_handler)
74
+
71
75
  for i in range(self.max_steps):
72
76
  step_num = i + 1
73
77
  logger.debug(f"Executing step {step_num}/{self.max_steps}")
@@ -0,0 +1,162 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+ from oagi.agent.tasker import TaskerAgent
9
+ from oagi.constants import (
10
+ DEFAULT_MAX_STEPS,
11
+ DEFAULT_MAX_STEPS_TASKER,
12
+ DEFAULT_MAX_STEPS_THINKER,
13
+ DEFAULT_REFLECTION_INTERVAL_TASKER,
14
+ DEFAULT_STEP_DELAY,
15
+ DEFAULT_TEMPERATURE_LOW,
16
+ MODEL_ACTOR,
17
+ MODEL_THINKER,
18
+ )
19
+ from oagi.types import AsyncStepObserver
20
+
21
+ from .default import AsyncDefaultAgent
22
+ from .protocol import AsyncAgent
23
+ from .registry import async_agent_register
24
+
25
+
26
+ @async_agent_register(mode="actor")
27
+ def create_default_agent(
28
+ api_key: str | None = None,
29
+ base_url: str | None = None,
30
+ model: str = MODEL_ACTOR,
31
+ max_steps: int = DEFAULT_MAX_STEPS,
32
+ temperature: float = DEFAULT_TEMPERATURE_LOW,
33
+ step_observer: AsyncStepObserver | None = None,
34
+ step_delay: float = DEFAULT_STEP_DELAY,
35
+ ) -> AsyncAgent:
36
+ return AsyncDefaultAgent(
37
+ api_key=api_key,
38
+ base_url=base_url,
39
+ model=model,
40
+ max_steps=max_steps,
41
+ temperature=temperature,
42
+ step_observer=step_observer,
43
+ step_delay=step_delay,
44
+ )
45
+
46
+
47
+ @async_agent_register(mode="thinker")
48
+ def create_thinker_agent(
49
+ api_key: str | None = None,
50
+ base_url: str | None = None,
51
+ model: str = MODEL_THINKER,
52
+ max_steps: int = DEFAULT_MAX_STEPS_THINKER,
53
+ temperature: float = DEFAULT_TEMPERATURE_LOW,
54
+ step_observer: AsyncStepObserver | None = None,
55
+ step_delay: float = DEFAULT_STEP_DELAY,
56
+ ) -> AsyncAgent:
57
+ return AsyncDefaultAgent(
58
+ api_key=api_key,
59
+ base_url=base_url,
60
+ model=model,
61
+ max_steps=max_steps,
62
+ temperature=temperature,
63
+ step_observer=step_observer,
64
+ step_delay=step_delay,
65
+ )
66
+
67
+
68
+ @async_agent_register(mode="tasker:cvs_appointment")
69
+ def create_cvs_appointment_agent(
70
+ api_key: str | None = None,
71
+ base_url: str | None = None,
72
+ model: str = MODEL_ACTOR,
73
+ max_steps: int = DEFAULT_MAX_STEPS_TASKER,
74
+ temperature: float = DEFAULT_TEMPERATURE_LOW,
75
+ reflection_interval: int = DEFAULT_REFLECTION_INTERVAL_TASKER,
76
+ step_observer: AsyncStepObserver | None = None,
77
+ step_delay: float = DEFAULT_STEP_DELAY,
78
+ # CVS-specific parameters
79
+ first_name: str = "First",
80
+ last_name: str = "Last",
81
+ email: str = "user@example.com",
82
+ birthday: str = "01-01-1990", # MM-DD-YYYY
83
+ zip_code: str = "00000",
84
+ ) -> AsyncAgent:
85
+ tasker = TaskerAgent(
86
+ api_key=api_key,
87
+ base_url=base_url,
88
+ model=model,
89
+ max_steps=max_steps,
90
+ temperature=temperature,
91
+ reflection_interval=reflection_interval,
92
+ step_observer=step_observer,
93
+ step_delay=step_delay,
94
+ )
95
+
96
+ month, day, year = birthday.split("-")
97
+ instruction = (
98
+ f"Schedule an appointment at CVS for {first_name} {last_name} "
99
+ f"with email {email} and birthday {birthday}"
100
+ )
101
+ todos = [
102
+ "Open a new tab, go to www.cvs.com, type 'flu shot' in the search bar and press enter, "
103
+ "wait for the page to load, then click on the button of Schedule vaccinations on the "
104
+ "top of the page",
105
+ f"Enter the first name '{first_name}', last name '{last_name}', and email '{email}' "
106
+ "in the form. Do not use any suggested autofills. Make sure the mobile phone number "
107
+ "is empty.",
108
+ f"Slightly scroll down to see the date of birth, enter Month '{month}', Day '{day}', "
109
+ f"and Year '{year}' in the form",
110
+ "Click on 'Continue as guest' button, wait for the page to load with wait, "
111
+ "click on 'Add vaccines' button, select 'Flu' and click on 'Add vaccines'",
112
+ f"Click on 'next' to enter the page with recommendation vaccines, then click on "
113
+ f"'next' again, until on the page of entering zip code, enter '{zip_code}', select "
114
+ "the first option from the dropdown menu, and click on 'Search'",
115
+ ]
116
+
117
+ tasker.set_task(instruction, todos)
118
+ return tasker
119
+
120
+
121
+ @async_agent_register(mode="tasker:software_qa")
122
+ def create_software_qa_agent(
123
+ api_key: str | None = None,
124
+ base_url: str | None = None,
125
+ model: str = MODEL_ACTOR,
126
+ max_steps: int = DEFAULT_MAX_STEPS_TASKER,
127
+ temperature: float = DEFAULT_TEMPERATURE_LOW,
128
+ reflection_interval: int = DEFAULT_REFLECTION_INTERVAL_TASKER,
129
+ step_observer: AsyncStepObserver | None = None,
130
+ step_delay: float = DEFAULT_STEP_DELAY,
131
+ ) -> AsyncAgent:
132
+ tasker = TaskerAgent(
133
+ api_key=api_key,
134
+ base_url=base_url,
135
+ model=model,
136
+ max_steps=max_steps,
137
+ temperature=temperature,
138
+ reflection_interval=reflection_interval,
139
+ step_observer=step_observer,
140
+ step_delay=step_delay,
141
+ )
142
+
143
+ instruction = "QA: click through every sidebar button in the Nuclear Player UI"
144
+ todos = [
145
+ "Click on 'Dashboard' in the left sidebar",
146
+ "Click on 'Downloads' in the left sidebar",
147
+ "Click on 'Lyrics' in the left sidebar",
148
+ "Click on 'Plugins' in the left sidebar",
149
+ "Click on 'Search Results' in the left sidebar",
150
+ "Click on 'Settings' in the left sidebar",
151
+ "Click on 'Equalizer' in the left sidebar",
152
+ "Click on 'Visualizer' in the left sidebar",
153
+ "Click on 'Listening History' in the left sidebar",
154
+ "Click on 'Favorite Albums' in the left sidebar",
155
+ "Click on 'Favorite Tracks' in the left sidebar",
156
+ "Click on 'Favorite Artists' in the left sidebar",
157
+ "Click on 'Local Library' in the left sidebar",
158
+ "Click on 'Playlists' in the left sidebar",
159
+ ]
160
+
161
+ tasker.set_task(instruction, todos)
162
+ return tasker
@@ -11,7 +11,7 @@ from typing import Any
11
11
 
12
12
  from ...client import AsyncClient
13
13
  from ...constants import DEFAULT_REFLECTION_INTERVAL
14
- from ...types import URL, Image
14
+ from ...types import URL, Image, extract_uuid_from_url
15
15
  from .memory import PlannerMemory
16
16
  from .models import Action, PlannerOutput, ReflectionOutput
17
17
 
@@ -138,11 +138,16 @@ class Planner:
138
138
  # Ensure we have a client
139
139
  client = self._ensure_client()
140
140
 
141
- # Upload screenshot if provided
141
+ # Get screenshot UUID - either extract from URL or upload
142
142
  screenshot_uuid = None
143
143
  if screenshot:
144
- upload_response = await client.put_s3_presigned_url(screenshot)
145
- screenshot_uuid = upload_response.uuid
144
+ # Check if screenshot is already a URL (already uploaded to S3)
145
+ if isinstance(screenshot, str):
146
+ screenshot_uuid = extract_uuid_from_url(screenshot)
147
+ # If not a URL or UUID extraction failed, upload the image
148
+ if not screenshot_uuid:
149
+ upload_response = await client.put_s3_presigned_url(screenshot)
150
+ screenshot_uuid = upload_response.uuid
146
151
 
147
152
  # Extract memory data if provided
148
153
  (
@@ -195,11 +200,16 @@ class Planner:
195
200
  # Ensure we have a client
196
201
  client = self._ensure_client()
197
202
 
198
- # Upload screenshot if provided
203
+ # Get screenshot UUID - either extract from URL or upload
199
204
  result_screenshot_uuid = None
200
205
  if screenshot:
201
- upload_response = await client.put_s3_presigned_url(screenshot)
202
- result_screenshot_uuid = upload_response.uuid
206
+ # Check if screenshot is already a URL (already uploaded to S3)
207
+ if isinstance(screenshot, str):
208
+ result_screenshot_uuid = extract_uuid_from_url(screenshot)
209
+ # If not a URL or UUID extraction failed, upload the image
210
+ if not result_screenshot_uuid:
211
+ upload_response = await client.put_s3_presigned_url(screenshot)
212
+ result_screenshot_uuid = upload_response.uuid
203
213
 
204
214
  # Extract memory data if provided
205
215
  (
@@ -19,6 +19,7 @@ from oagi.constants import (
19
19
  DEFAULT_TEMPERATURE,
20
20
  MODEL_ACTOR,
21
21
  )
22
+ from oagi.handler import reset_handler
22
23
  from oagi.types import (
23
24
  URL,
24
25
  ActionEvent,
@@ -28,6 +29,7 @@ from oagi.types import (
28
29
  Image,
29
30
  PlanEvent,
30
31
  StepEvent,
32
+ extract_uuid_from_url,
31
33
  )
32
34
 
33
35
  from ..protocol import AsyncAgent
@@ -121,6 +123,9 @@ class TaskeeAgent(AsyncAgent):
121
123
  Returns:
122
124
  True if successful, False otherwise
123
125
  """
126
+ # Reset handler state at todo execution start
127
+ reset_handler(action_handler)
128
+
124
129
  self.current_todo = instruction
125
130
  self.actions = []
126
131
  self.total_actions = 0
@@ -256,11 +261,21 @@ class TaskeeAgent(AsyncAgent):
256
261
  # Capture screenshot
257
262
  screenshot = await image_provider()
258
263
 
259
- # Upload screenshot first to get UUID (avoids re-upload in actor.step)
264
+ # Get screenshot UUID - either extract from URL or upload
260
265
  try:
261
- upload_response = await client.put_s3_presigned_url(screenshot)
262
- screenshot_uuid = upload_response.uuid
263
- screenshot_url = upload_response.download_url
266
+ screenshot_uuid = None
267
+ screenshot_url = None
268
+
269
+ # Check if screenshot is already a URL (from SocketIOImageProvider)
270
+ if isinstance(screenshot, str):
271
+ screenshot_uuid = extract_uuid_from_url(screenshot)
272
+ screenshot_url = screenshot
273
+
274
+ # If not a URL or UUID extraction failed, upload the image
275
+ if not screenshot_uuid:
276
+ upload_response = await client.put_s3_presigned_url(screenshot)
277
+ screenshot_uuid = upload_response.uuid
278
+ screenshot_url = upload_response.download_url
264
279
  except Exception as e:
265
280
  logger.error(f"Error uploading screenshot: {e}")
266
281
  self._record_action(
@@ -16,6 +16,7 @@ from oagi.constants import (
16
16
  DEFAULT_TEMPERATURE,
17
17
  MODEL_ACTOR,
18
18
  )
19
+ from oagi.handler import reset_handler
19
20
  from oagi.types import AsyncActionHandler, AsyncImageProvider, AsyncObserver, SplitEvent
20
21
 
21
22
  from ..protocol import AsyncAgent
@@ -112,6 +113,9 @@ class TaskerAgent(AsyncAgent):
112
113
  Returns:
113
114
  True if all todos completed successfully, False otherwise
114
115
  """
116
+ # Reset handler state at automation start
117
+ reset_handler(action_handler)
118
+
115
119
  overall_success = True
116
120
 
117
121
  # Execute todos until none remain
@@ -17,12 +17,9 @@ from oagi.agent.observer import AsyncAgentObserver
17
17
  from oagi.constants import (
18
18
  API_KEY_HELP_URL,
19
19
  DEFAULT_BASE_URL,
20
- DEFAULT_MAX_STEPS,
21
20
  DEFAULT_MAX_STEPS_THINKER,
22
21
  DEFAULT_STEP_DELAY,
23
- DEFAULT_TEMPERATURE,
24
22
  MODE_ACTOR,
25
- MODEL_ACTOR,
26
23
  MODEL_THINKER,
27
24
  )
28
25
  from oagi.exceptions import check_optional_dependency
@@ -40,22 +37,30 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
40
37
  "run", help="Run an agent with the given instruction"
41
38
  )
42
39
  run_parser.add_argument(
43
- "instruction", type=str, help="Task instruction for the agent to execute"
40
+ "instruction",
41
+ type=str,
42
+ nargs="?",
43
+ default="",
44
+ help="Task instruction for the agent to execute (optional for pre-configured modes)",
44
45
  )
45
46
  run_parser.add_argument(
46
- "--model", type=str, help=f"Model to use (default: {MODEL_ACTOR})"
47
+ "--model", type=str, help="Model to use (default: determined by mode)"
47
48
  )
48
49
  run_parser.add_argument(
49
- "--max-steps", type=int, help="Maximum number of steps (default: 20)"
50
+ "--max-steps",
51
+ type=int,
52
+ help="Maximum number of steps (default: determined by mode)",
50
53
  )
51
54
  run_parser.add_argument(
52
- "--temperature", type=float, help="Sampling temperature (default: 0.5)"
55
+ "--temperature",
56
+ type=float,
57
+ help="Sampling temperature (default: determined by mode)",
53
58
  )
54
59
  run_parser.add_argument(
55
60
  "--mode",
56
61
  type=str,
57
62
  default=MODE_ACTOR,
58
- help=f"Agent mode to use (default: {MODE_ACTOR}). Available modes: actor, planner",
63
+ help=f"Agent mode to use (default: {MODE_ACTOR}). Use 'oagi agent modes' to list available modes",
59
64
  )
60
65
  run_parser.add_argument(
61
66
  "--oagi-api-key", type=str, help="OAGI API key (default: OAGI_API_KEY env var)"
@@ -82,6 +87,9 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
82
87
  help=f"Delay in seconds after each step before next screenshot (default: {DEFAULT_STEP_DELAY})",
83
88
  )
84
89
 
90
+ # agent modes command
91
+ agent_subparsers.add_parser("modes", help="List available agent modes")
92
+
85
93
  # agent permission command
86
94
  agent_subparsers.add_parser(
87
95
  "permission",
@@ -92,10 +100,22 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
92
100
  def handle_agent_command(args: argparse.Namespace) -> None:
93
101
  if args.agent_command == "run":
94
102
  run_agent(args)
103
+ elif args.agent_command == "modes":
104
+ list_modes()
95
105
  elif args.agent_command == "permission":
96
106
  check_permissions()
97
107
 
98
108
 
109
+ def list_modes() -> None:
110
+ """List all available agent modes."""
111
+ from oagi.agent import list_agent_modes # noqa: PLC0415
112
+
113
+ modes = list_agent_modes()
114
+ print("Available agent modes:")
115
+ for mode in modes:
116
+ print(f" - {mode}")
117
+
118
+
99
119
  def check_permissions() -> None:
100
120
  """Check and request macOS permissions for screen recording and accessibility.
101
121
 
@@ -207,14 +227,6 @@ def run_agent(args: argparse.Namespace) -> None:
207
227
  sys.exit(1)
208
228
 
209
229
  base_url = args.oagi_base_url or os.getenv("OAGI_BASE_URL", DEFAULT_BASE_URL)
210
- model = args.model or MODEL_ACTOR
211
- default_max_steps = (
212
- DEFAULT_MAX_STEPS_THINKER if model == MODEL_THINKER else DEFAULT_MAX_STEPS
213
- )
214
- max_steps = args.max_steps or default_max_steps
215
- temperature = (
216
- args.temperature if args.temperature is not None else DEFAULT_TEMPERATURE
217
- )
218
230
  mode = args.mode or MODE_ACTOR
219
231
  step_delay = args.step_delay if args.step_delay is not None else DEFAULT_STEP_DELAY
220
232
  export_format = args.export
@@ -233,26 +245,38 @@ def run_agent(args: argparse.Namespace) -> None:
233
245
 
234
246
  observer = CombinedObserver()
235
247
 
236
- # Create agent with observer
237
- agent = create_agent(
238
- mode=mode,
239
- api_key=api_key,
240
- base_url=base_url,
241
- model=model,
242
- max_steps=max_steps,
243
- temperature=temperature,
244
- step_observer=observer,
245
- step_delay=step_delay,
246
- )
248
+ # Build agent kwargs - only pass explicitly provided values, let factory use defaults
249
+ agent_kwargs = {
250
+ "mode": mode,
251
+ "api_key": api_key,
252
+ "base_url": base_url,
253
+ "step_observer": observer,
254
+ "step_delay": step_delay,
255
+ }
256
+ if args.model:
257
+ agent_kwargs["model"] = args.model
258
+ # If thinker model specified without max_steps, use thinker's default
259
+ if args.model == MODEL_THINKER and not args.max_steps:
260
+ agent_kwargs["max_steps"] = DEFAULT_MAX_STEPS_THINKER
261
+ if args.max_steps:
262
+ agent_kwargs["max_steps"] = args.max_steps
263
+ if args.temperature is not None:
264
+ agent_kwargs["temperature"] = args.temperature
265
+
266
+ # Create agent
267
+ agent = create_agent(**agent_kwargs)
247
268
 
248
269
  # Create handlers
249
270
  action_handler = AsyncPyautoguiActionHandler()
250
271
  image_provider = AsyncScreenshotMaker()
251
272
 
252
- print(f"Starting agent with instruction: {args.instruction}")
273
+ if args.instruction:
274
+ print(f"Starting agent with instruction: {args.instruction}")
275
+ else:
276
+ print(f"Starting agent with mode: {mode} (using pre-configured instruction)")
253
277
  print(
254
- f"Mode: {mode}, Model: {model}, Max steps: {max_steps}, "
255
- f"Temperature: {temperature}, Step delay: {step_delay}s"
278
+ f"Mode: {mode}, Model: {agent.model}, Max steps: {agent.max_steps}, "
279
+ f"Temperature: {agent.temperature}, Step delay: {step_delay}s"
256
280
  )
257
281
  print("-" * 60)
258
282
 
@@ -14,6 +14,21 @@ from oagi.handler.pyautogui_action_handler import (
14
14
  )
15
15
  from oagi.handler.screenshot_maker import ScreenshotMaker
16
16
 
17
+
18
+ def reset_handler(handler) -> None:
19
+ """Reset handler state if supported.
20
+
21
+ Uses duck-typing to check if the handler has a reset() method.
22
+ This allows handlers to reset their internal state (e.g., capslock state)
23
+ at the start of a new automation task.
24
+
25
+ Args:
26
+ handler: The action handler to reset
27
+ """
28
+ if hasattr(handler, "reset"):
29
+ handler.reset()
30
+
31
+
17
32
  __all__ = [
18
33
  "PILImage",
19
34
  "PyautoguiActionHandler",
@@ -21,4 +36,5 @@ __all__ = [
21
36
  "AsyncPyautoguiActionHandler",
22
37
  "ScreenshotMaker",
23
38
  "AsyncScreenshotMaker",
39
+ "reset_handler",
24
40
  ]