code-puppy 0.0.214__py3-none-any.whl → 0.0.366__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. code_puppy/__init__.py +7 -1
  2. code_puppy/agents/__init__.py +2 -0
  3. code_puppy/agents/agent_c_reviewer.py +59 -6
  4. code_puppy/agents/agent_code_puppy.py +7 -1
  5. code_puppy/agents/agent_code_reviewer.py +12 -2
  6. code_puppy/agents/agent_cpp_reviewer.py +73 -6
  7. code_puppy/agents/agent_creator_agent.py +45 -4
  8. code_puppy/agents/agent_golang_reviewer.py +92 -3
  9. code_puppy/agents/agent_javascript_reviewer.py +101 -8
  10. code_puppy/agents/agent_manager.py +81 -4
  11. code_puppy/agents/agent_pack_leader.py +383 -0
  12. code_puppy/agents/agent_planning.py +163 -0
  13. code_puppy/agents/agent_python_programmer.py +165 -0
  14. code_puppy/agents/agent_python_reviewer.py +28 -6
  15. code_puppy/agents/agent_qa_expert.py +98 -6
  16. code_puppy/agents/agent_qa_kitten.py +12 -7
  17. code_puppy/agents/agent_security_auditor.py +113 -3
  18. code_puppy/agents/agent_terminal_qa.py +323 -0
  19. code_puppy/agents/agent_typescript_reviewer.py +106 -7
  20. code_puppy/agents/base_agent.py +802 -176
  21. code_puppy/agents/event_stream_handler.py +350 -0
  22. code_puppy/agents/pack/__init__.py +34 -0
  23. code_puppy/agents/pack/bloodhound.py +304 -0
  24. code_puppy/agents/pack/husky.py +321 -0
  25. code_puppy/agents/pack/retriever.py +393 -0
  26. code_puppy/agents/pack/shepherd.py +348 -0
  27. code_puppy/agents/pack/terrier.py +287 -0
  28. code_puppy/agents/pack/watchdog.py +367 -0
  29. code_puppy/agents/prompt_reviewer.py +145 -0
  30. code_puppy/agents/subagent_stream_handler.py +276 -0
  31. code_puppy/api/__init__.py +13 -0
  32. code_puppy/api/app.py +169 -0
  33. code_puppy/api/main.py +21 -0
  34. code_puppy/api/pty_manager.py +446 -0
  35. code_puppy/api/routers/__init__.py +12 -0
  36. code_puppy/api/routers/agents.py +36 -0
  37. code_puppy/api/routers/commands.py +217 -0
  38. code_puppy/api/routers/config.py +74 -0
  39. code_puppy/api/routers/sessions.py +232 -0
  40. code_puppy/api/templates/terminal.html +361 -0
  41. code_puppy/api/websocket.py +154 -0
  42. code_puppy/callbacks.py +142 -4
  43. code_puppy/chatgpt_codex_client.py +283 -0
  44. code_puppy/claude_cache_client.py +586 -0
  45. code_puppy/cli_runner.py +916 -0
  46. code_puppy/command_line/add_model_menu.py +1079 -0
  47. code_puppy/command_line/agent_menu.py +395 -0
  48. code_puppy/command_line/attachments.py +10 -5
  49. code_puppy/command_line/autosave_menu.py +605 -0
  50. code_puppy/command_line/clipboard.py +527 -0
  51. code_puppy/command_line/colors_menu.py +520 -0
  52. code_puppy/command_line/command_handler.py +176 -738
  53. code_puppy/command_line/command_registry.py +150 -0
  54. code_puppy/command_line/config_commands.py +715 -0
  55. code_puppy/command_line/core_commands.py +792 -0
  56. code_puppy/command_line/diff_menu.py +863 -0
  57. code_puppy/command_line/load_context_completion.py +15 -22
  58. code_puppy/command_line/mcp/base.py +0 -3
  59. code_puppy/command_line/mcp/catalog_server_installer.py +175 -0
  60. code_puppy/command_line/mcp/custom_server_form.py +688 -0
  61. code_puppy/command_line/mcp/custom_server_installer.py +195 -0
  62. code_puppy/command_line/mcp/edit_command.py +148 -0
  63. code_puppy/command_line/mcp/handler.py +9 -4
  64. code_puppy/command_line/mcp/help_command.py +6 -5
  65. code_puppy/command_line/mcp/install_command.py +15 -26
  66. code_puppy/command_line/mcp/install_menu.py +685 -0
  67. code_puppy/command_line/mcp/list_command.py +2 -2
  68. code_puppy/command_line/mcp/logs_command.py +174 -65
  69. code_puppy/command_line/mcp/remove_command.py +2 -2
  70. code_puppy/command_line/mcp/restart_command.py +12 -4
  71. code_puppy/command_line/mcp/search_command.py +16 -10
  72. code_puppy/command_line/mcp/start_all_command.py +18 -6
  73. code_puppy/command_line/mcp/start_command.py +47 -25
  74. code_puppy/command_line/mcp/status_command.py +4 -5
  75. code_puppy/command_line/mcp/stop_all_command.py +7 -1
  76. code_puppy/command_line/mcp/stop_command.py +8 -4
  77. code_puppy/command_line/mcp/test_command.py +2 -2
  78. code_puppy/command_line/mcp/wizard_utils.py +20 -16
  79. code_puppy/command_line/mcp_completion.py +174 -0
  80. code_puppy/command_line/model_picker_completion.py +75 -25
  81. code_puppy/command_line/model_settings_menu.py +884 -0
  82. code_puppy/command_line/motd.py +14 -8
  83. code_puppy/command_line/onboarding_slides.py +179 -0
  84. code_puppy/command_line/onboarding_wizard.py +340 -0
  85. code_puppy/command_line/pin_command_completion.py +329 -0
  86. code_puppy/command_line/prompt_toolkit_completion.py +463 -63
  87. code_puppy/command_line/session_commands.py +296 -0
  88. code_puppy/command_line/utils.py +54 -0
  89. code_puppy/config.py +898 -112
  90. code_puppy/error_logging.py +118 -0
  91. code_puppy/gemini_code_assist.py +385 -0
  92. code_puppy/gemini_model.py +602 -0
  93. code_puppy/http_utils.py +210 -148
  94. code_puppy/keymap.py +128 -0
  95. code_puppy/main.py +5 -698
  96. code_puppy/mcp_/__init__.py +17 -0
  97. code_puppy/mcp_/async_lifecycle.py +35 -4
  98. code_puppy/mcp_/blocking_startup.py +70 -43
  99. code_puppy/mcp_/captured_stdio_server.py +2 -2
  100. code_puppy/mcp_/config_wizard.py +4 -4
  101. code_puppy/mcp_/dashboard.py +15 -6
  102. code_puppy/mcp_/managed_server.py +65 -38
  103. code_puppy/mcp_/manager.py +146 -52
  104. code_puppy/mcp_/mcp_logs.py +224 -0
  105. code_puppy/mcp_/registry.py +6 -6
  106. code_puppy/mcp_/server_registry_catalog.py +24 -5
  107. code_puppy/messaging/__init__.py +199 -2
  108. code_puppy/messaging/bus.py +610 -0
  109. code_puppy/messaging/commands.py +167 -0
  110. code_puppy/messaging/markdown_patches.py +57 -0
  111. code_puppy/messaging/message_queue.py +17 -48
  112. code_puppy/messaging/messages.py +500 -0
  113. code_puppy/messaging/queue_console.py +1 -24
  114. code_puppy/messaging/renderers.py +43 -146
  115. code_puppy/messaging/rich_renderer.py +1027 -0
  116. code_puppy/messaging/spinner/__init__.py +21 -5
  117. code_puppy/messaging/spinner/console_spinner.py +86 -51
  118. code_puppy/messaging/subagent_console.py +461 -0
  119. code_puppy/model_factory.py +634 -83
  120. code_puppy/model_utils.py +167 -0
  121. code_puppy/models.json +66 -68
  122. code_puppy/models_dev_api.json +1 -0
  123. code_puppy/models_dev_parser.py +592 -0
  124. code_puppy/plugins/__init__.py +164 -10
  125. code_puppy/plugins/antigravity_oauth/__init__.py +10 -0
  126. code_puppy/plugins/antigravity_oauth/accounts.py +406 -0
  127. code_puppy/plugins/antigravity_oauth/antigravity_model.py +704 -0
  128. code_puppy/plugins/antigravity_oauth/config.py +42 -0
  129. code_puppy/plugins/antigravity_oauth/constants.py +136 -0
  130. code_puppy/plugins/antigravity_oauth/oauth.py +478 -0
  131. code_puppy/plugins/antigravity_oauth/register_callbacks.py +406 -0
  132. code_puppy/plugins/antigravity_oauth/storage.py +271 -0
  133. code_puppy/plugins/antigravity_oauth/test_plugin.py +319 -0
  134. code_puppy/plugins/antigravity_oauth/token.py +167 -0
  135. code_puppy/plugins/antigravity_oauth/transport.py +767 -0
  136. code_puppy/plugins/antigravity_oauth/utils.py +169 -0
  137. code_puppy/plugins/chatgpt_oauth/__init__.py +8 -0
  138. code_puppy/plugins/chatgpt_oauth/config.py +52 -0
  139. code_puppy/plugins/chatgpt_oauth/oauth_flow.py +328 -0
  140. code_puppy/plugins/chatgpt_oauth/register_callbacks.py +94 -0
  141. code_puppy/plugins/chatgpt_oauth/test_plugin.py +293 -0
  142. code_puppy/plugins/chatgpt_oauth/utils.py +489 -0
  143. code_puppy/plugins/claude_code_oauth/README.md +167 -0
  144. code_puppy/plugins/claude_code_oauth/SETUP.md +93 -0
  145. code_puppy/plugins/claude_code_oauth/__init__.py +6 -0
  146. code_puppy/plugins/claude_code_oauth/config.py +50 -0
  147. code_puppy/plugins/claude_code_oauth/register_callbacks.py +308 -0
  148. code_puppy/plugins/claude_code_oauth/test_plugin.py +283 -0
  149. code_puppy/plugins/claude_code_oauth/utils.py +518 -0
  150. code_puppy/plugins/customizable_commands/__init__.py +0 -0
  151. code_puppy/plugins/customizable_commands/register_callbacks.py +169 -0
  152. code_puppy/plugins/example_custom_command/README.md +280 -0
  153. code_puppy/plugins/example_custom_command/register_callbacks.py +2 -2
  154. code_puppy/plugins/file_permission_handler/__init__.py +4 -0
  155. code_puppy/plugins/file_permission_handler/register_callbacks.py +523 -0
  156. code_puppy/plugins/frontend_emitter/__init__.py +25 -0
  157. code_puppy/plugins/frontend_emitter/emitter.py +121 -0
  158. code_puppy/plugins/frontend_emitter/register_callbacks.py +261 -0
  159. code_puppy/plugins/oauth_puppy_html.py +228 -0
  160. code_puppy/plugins/shell_safety/__init__.py +6 -0
  161. code_puppy/plugins/shell_safety/agent_shell_safety.py +69 -0
  162. code_puppy/plugins/shell_safety/command_cache.py +156 -0
  163. code_puppy/plugins/shell_safety/register_callbacks.py +202 -0
  164. code_puppy/prompts/antigravity_system_prompt.md +1 -0
  165. code_puppy/prompts/codex_system_prompt.md +310 -0
  166. code_puppy/pydantic_patches.py +131 -0
  167. code_puppy/reopenable_async_client.py +8 -8
  168. code_puppy/round_robin_model.py +9 -12
  169. code_puppy/session_storage.py +2 -1
  170. code_puppy/status_display.py +21 -4
  171. code_puppy/summarization_agent.py +41 -13
  172. code_puppy/terminal_utils.py +418 -0
  173. code_puppy/tools/__init__.py +37 -1
  174. code_puppy/tools/agent_tools.py +536 -52
  175. code_puppy/tools/browser/__init__.py +37 -0
  176. code_puppy/tools/browser/browser_control.py +19 -23
  177. code_puppy/tools/browser/browser_interactions.py +41 -48
  178. code_puppy/tools/browser/browser_locators.py +36 -38
  179. code_puppy/tools/browser/browser_manager.py +316 -0
  180. code_puppy/tools/browser/browser_navigation.py +16 -16
  181. code_puppy/tools/browser/browser_screenshot.py +79 -143
  182. code_puppy/tools/browser/browser_scripts.py +32 -42
  183. code_puppy/tools/browser/browser_workflows.py +44 -27
  184. code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
  185. code_puppy/tools/browser/terminal_command_tools.py +521 -0
  186. code_puppy/tools/browser/terminal_screenshot_tools.py +556 -0
  187. code_puppy/tools/browser/terminal_tools.py +525 -0
  188. code_puppy/tools/command_runner.py +930 -147
  189. code_puppy/tools/common.py +1113 -5
  190. code_puppy/tools/display.py +84 -0
  191. code_puppy/tools/file_modifications.py +288 -89
  192. code_puppy/tools/file_operations.py +226 -154
  193. code_puppy/tools/subagent_context.py +158 -0
  194. code_puppy/uvx_detection.py +242 -0
  195. code_puppy/version_checker.py +30 -11
  196. code_puppy-0.0.366.data/data/code_puppy/models.json +110 -0
  197. code_puppy-0.0.366.data/data/code_puppy/models_dev_api.json +1 -0
  198. {code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/METADATA +149 -75
  199. code_puppy-0.0.366.dist-info/RECORD +217 -0
  200. {code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/WHEEL +1 -1
  201. code_puppy/command_line/mcp/add_command.py +0 -183
  202. code_puppy/messaging/spinner/textual_spinner.py +0 -106
  203. code_puppy/tools/browser/camoufox_manager.py +0 -216
  204. code_puppy/tools/browser/vqa_agent.py +0 -70
  205. code_puppy/tui/__init__.py +0 -10
  206. code_puppy/tui/app.py +0 -1105
  207. code_puppy/tui/components/__init__.py +0 -21
  208. code_puppy/tui/components/chat_view.py +0 -551
  209. code_puppy/tui/components/command_history_modal.py +0 -218
  210. code_puppy/tui/components/copy_button.py +0 -139
  211. code_puppy/tui/components/custom_widgets.py +0 -63
  212. code_puppy/tui/components/human_input_modal.py +0 -175
  213. code_puppy/tui/components/input_area.py +0 -167
  214. code_puppy/tui/components/sidebar.py +0 -309
  215. code_puppy/tui/components/status_bar.py +0 -185
  216. code_puppy/tui/messages.py +0 -27
  217. code_puppy/tui/models/__init__.py +0 -8
  218. code_puppy/tui/models/chat_message.py +0 -25
  219. code_puppy/tui/models/command_history.py +0 -89
  220. code_puppy/tui/models/enums.py +0 -24
  221. code_puppy/tui/screens/__init__.py +0 -17
  222. code_puppy/tui/screens/autosave_picker.py +0 -175
  223. code_puppy/tui/screens/help.py +0 -130
  224. code_puppy/tui/screens/mcp_install_wizard.py +0 -803
  225. code_puppy/tui/screens/settings.py +0 -306
  226. code_puppy/tui/screens/tools.py +0 -74
  227. code_puppy/tui_state.py +0 -55
  228. code_puppy-0.0.214.data/data/code_puppy/models.json +0 -112
  229. code_puppy-0.0.214.dist-info/RECORD +0 -131
  230. {code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/entry_points.txt +0 -0
  231. {code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/licenses/LICENSE +0 -0
@@ -1,19 +1,21 @@
1
- """Screenshot and visual analysis tool with VQA capabilities."""
1
+ """Screenshot tool for browser automation.
2
2
 
3
- import asyncio
3
+ Captures screenshots and returns them via ToolReturn with BinaryContent
4
+ so multimodal models can directly see and analyze - no separate VQA agent needed.
5
+ """
6
+
7
+ import time
4
8
  from datetime import datetime
5
9
  from pathlib import Path
6
10
  from tempfile import gettempdir, mkdtemp
7
- from typing import Any, Dict, Optional
11
+ from typing import Any, Dict, Optional, Union
8
12
 
9
- from pydantic import BaseModel
10
- from pydantic_ai import RunContext
13
+ from pydantic_ai import BinaryContent, RunContext, ToolReturn
11
14
 
12
- from code_puppy.messaging import emit_error, emit_info
15
+ from code_puppy.messaging import emit_error, emit_info, emit_success
13
16
  from code_puppy.tools.common import generate_group_id
14
17
 
15
- from .camoufox_manager import get_camoufox_manager
16
- from .vqa_agent import run_vqa_analysis
18
+ from .browser_manager import get_session_browser_manager
17
19
 
18
20
  _TEMP_SCREENSHOT_ROOT = Path(
19
21
  mkdtemp(prefix="code_puppy_screenshots_", dir=gettempdir())
@@ -21,21 +23,11 @@ _TEMP_SCREENSHOT_ROOT = Path(
21
23
 
22
24
 
23
25
  def _build_screenshot_path(timestamp: str) -> Path:
24
- """Return the target path for a screenshot using a shared temp directory."""
26
+ """Return the target path for a screenshot."""
25
27
  filename = f"screenshot_{timestamp}.png"
26
28
  return _TEMP_SCREENSHOT_ROOT / filename
27
29
 
28
30
 
29
- class ScreenshotResult(BaseModel):
30
- """Result from screenshot operation."""
31
-
32
- success: bool
33
- screenshot_path: Optional[str] = None
34
- screenshot_data: Optional[bytes] = None
35
- timestamp: Optional[str] = None
36
- error: Optional[str] = None
37
-
38
-
39
31
  async def _capture_screenshot(
40
32
  page,
41
33
  full_page: bool = False,
@@ -45,41 +37,37 @@ async def _capture_screenshot(
45
37
  ) -> Dict[str, Any]:
46
38
  """Internal screenshot capture function."""
47
39
  try:
48
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
40
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
49
41
 
50
42
  # Take screenshot
51
43
  if element_selector:
52
- # Screenshot specific element
53
44
  element = await page.locator(element_selector).first
54
45
  if not await element.is_visible():
55
46
  return {
56
47
  "success": False,
57
48
  "error": f"Element '{element_selector}' is not visible",
58
49
  }
59
- screenshot_data = await element.screenshot()
50
+ screenshot_bytes = await element.screenshot()
60
51
  else:
61
- # Screenshot page or full page
62
- screenshot_data = await page.screenshot(full_page=full_page)
52
+ screenshot_bytes = await page.screenshot(full_page=full_page)
63
53
 
64
- result = {
54
+ result: Dict[str, Any] = {
65
55
  "success": True,
66
- "screenshot_data": screenshot_data,
56
+ "screenshot_bytes": screenshot_bytes,
67
57
  "timestamp": timestamp,
68
58
  }
69
59
 
70
60
  if save_screenshot:
71
61
  screenshot_path = _build_screenshot_path(timestamp)
72
62
  screenshot_path.parent.mkdir(parents=True, exist_ok=True)
73
-
74
63
  with open(screenshot_path, "wb") as f:
75
- f.write(screenshot_data)
76
-
64
+ f.write(screenshot_bytes)
77
65
  result["screenshot_path"] = str(screenshot_path)
78
- message = f"[green]Screenshot saved: {screenshot_path}[/green]"
66
+
79
67
  if group_id:
80
- emit_info(message, message_group=group_id)
81
- else:
82
- emit_info(message)
68
+ emit_success(
69
+ f"Screenshot saved: {screenshot_path}", message_group=group_id
70
+ )
83
71
 
84
72
  return result
85
73
 
@@ -87,46 +75,42 @@ async def _capture_screenshot(
87
75
  return {"success": False, "error": str(e)}
88
76
 
89
77
 
90
- async def take_screenshot_and_analyze(
91
- question: str,
78
+ async def take_screenshot(
92
79
  full_page: bool = False,
93
80
  element_selector: Optional[str] = None,
94
81
  save_screenshot: bool = True,
95
- ) -> Dict[str, Any]:
96
- """
97
- Take a screenshot and analyze it using visual understanding.
82
+ ) -> Union[ToolReturn, Dict[str, Any]]:
83
+ """Take a screenshot of the browser page.
84
+
85
+ Returns a ToolReturn with BinaryContent so multimodal models can
86
+ directly see and analyze the screenshot.
98
87
 
99
88
  Args:
100
- question: The specific question to ask about the screenshot
101
- full_page: Whether to capture the full page or just viewport
102
- element_selector: Optional selector to screenshot just a specific element
103
- save_screenshot: Whether to save the screenshot to disk
89
+ full_page: Whether to capture full page or just viewport.
90
+ element_selector: Optional selector to screenshot specific element.
91
+ save_screenshot: Whether to save the screenshot to disk.
104
92
 
105
93
  Returns:
106
- Dict containing analysis results and screenshot info
94
+ ToolReturn containing:
95
+ - return_value: Success message with screenshot path
96
+ - content: List with description and BinaryContent image
97
+ - metadata: Screenshot details (path, target, timestamp)
98
+ Or Dict with error info if failed.
107
99
  """
108
100
  target = element_selector or ("full_page" if full_page else "viewport")
109
- group_id = generate_group_id(
110
- "browser_screenshot_analyze", f"{question[:50]}_{target}"
111
- )
112
- emit_info(
113
- f"[bold white on blue] BROWSER SCREENSHOT ANALYZE [/bold white on blue] 📷 question='{question[:100]}{'...' if len(question) > 100 else ''}' target={target}",
114
- message_group=group_id,
115
- )
101
+ group_id = generate_group_id("browser_screenshot", target)
102
+ emit_info(f"BROWSER SCREENSHOT 📷 target={target}", message_group=group_id)
103
+
116
104
  try:
117
- # Get the current browser page
118
- browser_manager = get_camoufox_manager()
105
+ browser_manager = get_session_browser_manager()
119
106
  page = await browser_manager.get_current_page()
120
107
 
121
108
  if not page:
122
- return {
123
- "success": False,
124
- "error": "No active browser page available. Please navigate to a webpage first.",
125
- "question": question,
126
- }
109
+ error_msg = "No active browser page. Navigate to a webpage first."
110
+ emit_error(error_msg, message_group=group_id)
111
+ return {"success": False, "error": error_msg}
127
112
 
128
- # Take screenshot
129
- screenshot_result = await _capture_screenshot(
113
+ result = await _capture_screenshot(
130
114
  page,
131
115
  full_page=full_page,
132
116
  element_selector=element_selector,
@@ -134,110 +118,62 @@ async def take_screenshot_and_analyze(
134
118
  group_id=group_id,
135
119
  )
136
120
 
137
- if not screenshot_result["success"]:
138
- error_message = screenshot_result.get("error", "Screenshot failed")
139
- emit_error(
140
- f"[red]Screenshot capture failed: {error_message}[/red]",
141
- message_group=group_id,
142
- )
143
- return {
144
- "success": False,
145
- "error": error_message,
146
- "question": question,
147
- }
148
-
149
- screenshot_bytes = screenshot_result.get("screenshot_data")
150
- if not screenshot_bytes:
151
- emit_error(
152
- "[red]Screenshot captured but pixel data missing; cannot run visual analysis.[/red]",
153
- message_group=group_id,
154
- )
155
- return {
156
- "success": False,
157
- "error": "Screenshot captured but no image bytes available for analysis.",
158
- "question": question,
159
- }
160
-
161
- try:
162
- vqa_result = await asyncio.to_thread(
163
- run_vqa_analysis,
164
- question,
165
- screenshot_bytes,
166
- )
167
- except Exception as exc:
168
- emit_error(
169
- f"[red]Visual question answering failed: {exc}[/red]",
170
- message_group=group_id,
171
- )
172
- return {
173
- "success": False,
174
- "error": f"Visual analysis failed: {exc}",
175
- "question": question,
176
- "screenshot_info": {
177
- "path": screenshot_result.get("screenshot_path"),
178
- "timestamp": screenshot_result.get("timestamp"),
179
- "full_page": full_page,
180
- "element_selector": element_selector,
181
- },
182
- }
183
-
184
- emit_info(
185
- f"[green]Visual analysis answer: {vqa_result.answer}[/green]",
186
- message_group=group_id,
187
- )
188
- emit_info(
189
- f"[dim]Observations: {vqa_result.observations}[/dim]",
190
- message_group=group_id,
191
- )
192
-
193
- return {
194
- "success": True,
195
- "question": question,
196
- "answer": vqa_result.answer,
197
- "confidence": vqa_result.confidence,
198
- "observations": vqa_result.observations,
199
- "screenshot_info": {
200
- "path": screenshot_result.get("screenshot_path"),
201
- "size": len(screenshot_bytes),
202
- "timestamp": screenshot_result.get("timestamp"),
121
+ if not result["success"]:
122
+ emit_error(result.get("error", "Screenshot failed"), message_group=group_id)
123
+ return {"success": False, "error": result.get("error")}
124
+
125
+ screenshot_path = result.get("screenshot_path", "(not saved)")
126
+
127
+ # Return as ToolReturn with BinaryContent so the model can SEE the image!
128
+ return ToolReturn(
129
+ return_value=f"Screenshot captured successfully. Saved to: {screenshot_path}",
130
+ content=[
131
+ f"Here's the browser screenshot ({target}):",
132
+ BinaryContent(
133
+ data=result["screenshot_bytes"],
134
+ media_type="image/png",
135
+ ),
136
+ "Please analyze what you see and describe any relevant details.",
137
+ ],
138
+ metadata={
139
+ "success": True,
140
+ "screenshot_path": screenshot_path,
141
+ "target": target,
203
142
  "full_page": full_page,
204
143
  "element_selector": element_selector,
144
+ "timestamp": time.time(),
205
145
  },
206
- }
146
+ )
207
147
 
208
148
  except Exception as e:
209
- emit_info(
210
- f"[red]Screenshot analysis failed: {str(e)}[/red]", message_group=group_id
211
- )
212
- return {"success": False, "error": str(e), "question": question}
149
+ error_msg = f"Screenshot failed: {str(e)}"
150
+ emit_error(error_msg, message_group=group_id)
151
+ return {"success": False, "error": error_msg}
213
152
 
214
153
 
215
154
  def register_take_screenshot_and_analyze(agent):
216
- """Register the screenshot analysis tool."""
155
+ """Register the screenshot tool."""
217
156
 
218
157
  @agent.tool
219
158
  async def browser_screenshot_analyze(
220
159
  context: RunContext,
221
- question: str,
222
160
  full_page: bool = False,
223
161
  element_selector: Optional[str] = None,
224
- save_screenshot: bool = True,
225
- ) -> Dict[str, Any]:
162
+ ) -> Union[ToolReturn, Dict[str, Any]]:
226
163
  """
227
- Take a screenshot and analyze it to answer a specific question.
164
+ Take a screenshot of the browser page.
165
+
166
+ Returns the screenshot via ToolReturn with BinaryContent that you can
167
+ see directly. Use this to see what's displayed in the browser.
228
168
 
229
169
  Args:
230
- question: The specific question to ask about the screenshot
231
- full_page: Whether to capture the full page or just viewport
232
- element_selector: Optional CSS/XPath selector to screenshot specific element
233
- save_screenshot: Whether to save the screenshot to disk
170
+ full_page: Capture full page (True) or just viewport (False).
171
+ element_selector: Optional CSS selector to screenshot specific element.
234
172
 
235
173
  Returns:
236
- Dict with analysis results including answer, confidence, and observations
174
+ ToolReturn with the screenshot image you can analyze, or error dict.
237
175
  """
238
- return await take_screenshot_and_analyze(
239
- question=question,
176
+ return await take_screenshot(
240
177
  full_page=full_page,
241
178
  element_selector=element_selector,
242
- save_screenshot=save_screenshot,
243
179
  )
@@ -4,10 +4,10 @@ from typing import Any, Dict, Optional
4
4
 
5
5
  from pydantic_ai import RunContext
6
6
 
7
- from code_puppy.messaging import emit_info
7
+ from code_puppy.messaging import emit_error, emit_info, emit_success
8
8
  from code_puppy.tools.common import generate_group_id
9
9
 
10
- from .camoufox_manager import get_camoufox_manager
10
+ from .browser_manager import get_session_browser_manager
11
11
 
12
12
 
13
13
  async def execute_javascript(
@@ -17,29 +17,27 @@ async def execute_javascript(
17
17
  """Execute JavaScript code in the browser context."""
18
18
  group_id = generate_group_id("browser_execute_js", script[:100])
19
19
  emit_info(
20
- f"[bold white on blue] BROWSER EXECUTE JS [/bold white on blue] 📜 script='{script[:100]}{'...' if len(script) > 100 else ''}'",
20
+ f"BROWSER EXECUTE JS 📜 script='{script[:100]}{'...' if len(script) > 100 else ''}'",
21
21
  message_group=group_id,
22
22
  )
23
23
  try:
24
- browser_manager = get_camoufox_manager()
24
+ browser_manager = get_session_browser_manager()
25
25
  page = await browser_manager.get_current_page()
26
26
 
27
27
  if not page:
28
28
  return {"success": False, "error": "No active browser page available"}
29
29
 
30
30
  # Execute JavaScript
31
- result = await page.evaluate(script, timeout=timeout)
31
+ # Note: page.evaluate() does NOT accept a timeout parameter
32
+ # The timeout arg to this function is kept for API compatibility but unused
33
+ result = await page.evaluate(script)
32
34
 
33
- emit_info(
34
- "[green]JavaScript executed successfully[/green]", message_group=group_id
35
- )
35
+ emit_success("JavaScript executed successfully", message_group=group_id)
36
36
 
37
37
  return {"success": True, "script": script, "result": result}
38
38
 
39
39
  except Exception as e:
40
- emit_info(
41
- f"[red]JavaScript execution failed: {str(e)}[/red]", message_group=group_id
42
- )
40
+ emit_error(f"JavaScript execution failed: {str(e)}", message_group=group_id)
43
41
  return {"success": False, "error": str(e), "script": script}
44
42
 
45
43
 
@@ -52,11 +50,11 @@ async def scroll_page(
52
50
  target = element_selector or "page"
53
51
  group_id = generate_group_id("browser_scroll", f"{direction}_{amount}_{target}")
54
52
  emit_info(
55
- f"[bold white on blue] BROWSER SCROLL [/bold white on blue] 📋 direction={direction} amount={amount} target='{target}'",
53
+ f"BROWSER SCROLL 📋 direction={direction} amount={amount} target='{target}'",
56
54
  message_group=group_id,
57
55
  )
58
56
  try:
59
- browser_manager = get_camoufox_manager()
57
+ browser_manager = get_session_browser_manager()
60
58
  page = await browser_manager.get_current_page()
61
59
 
62
60
  if not page:
@@ -64,7 +62,7 @@ async def scroll_page(
64
62
 
65
63
  if element_selector:
66
64
  # Scroll specific element
67
- element = page.locator(element_selector)
65
+ element = page.locator(element_selector).first
68
66
  await element.scroll_into_view_if_needed()
69
67
 
70
68
  # Get element's current scroll position and dimensions
@@ -120,9 +118,7 @@ async def scroll_page(
120
118
  })
121
119
  """)
122
120
 
123
- emit_info(
124
- f"[green]Scrolled {target} {direction}[/green]", message_group=group_id
125
- )
121
+ emit_success(f"Scrolled {target} {direction}", message_group=group_id)
126
122
 
127
123
  return {
128
124
  "success": True,
@@ -148,26 +144,24 @@ async def scroll_to_element(
148
144
  """Scroll to bring an element into view."""
149
145
  group_id = generate_group_id("browser_scroll_to_element", selector[:100])
150
146
  emit_info(
151
- f"[bold white on blue] BROWSER SCROLL TO ELEMENT [/bold white on blue] 🎯 selector='{selector}'",
147
+ f"BROWSER SCROLL TO ELEMENT 🎯 selector='{selector}'",
152
148
  message_group=group_id,
153
149
  )
154
150
  try:
155
- browser_manager = get_camoufox_manager()
151
+ browser_manager = get_session_browser_manager()
156
152
  page = await browser_manager.get_current_page()
157
153
 
158
154
  if not page:
159
155
  return {"success": False, "error": "No active browser page available"}
160
156
 
161
- element = page.locator(selector)
157
+ element = page.locator(selector).first
162
158
  await element.wait_for(state="attached", timeout=timeout)
163
159
  await element.scroll_into_view_if_needed()
164
160
 
165
161
  # Check if element is now visible
166
162
  is_visible = await element.is_visible()
167
163
 
168
- emit_info(
169
- f"[green]Scrolled to element: {selector}[/green]", message_group=group_id
170
- )
164
+ emit_success(f"Scrolled to element: {selector}", message_group=group_id)
171
165
 
172
166
  return {"success": True, "selector": selector, "visible": is_visible}
173
167
 
@@ -182,11 +176,11 @@ async def set_viewport_size(
182
176
  """Set the viewport size."""
183
177
  group_id = generate_group_id("browser_set_viewport", f"{width}x{height}")
184
178
  emit_info(
185
- f"[bold white on blue] BROWSER SET VIEWPORT [/bold white on blue] 🖥️ size={width}x{height}",
179
+ f"BROWSER SET VIEWPORT 🖥️ size={width}x{height}",
186
180
  message_group=group_id,
187
181
  )
188
182
  try:
189
- browser_manager = get_camoufox_manager()
183
+ browser_manager = get_session_browser_manager()
190
184
  page = await browser_manager.get_current_page()
191
185
 
192
186
  if not page:
@@ -194,8 +188,8 @@ async def set_viewport_size(
194
188
 
195
189
  await page.set_viewport_size({"width": width, "height": height})
196
190
 
197
- emit_info(
198
- f"[green]Set viewport size to {width}x{height}[/green]",
191
+ emit_success(
192
+ f"Set viewport size to {width}x{height}",
199
193
  message_group=group_id,
200
194
  )
201
195
 
@@ -213,22 +207,20 @@ async def wait_for_element(
213
207
  """Wait for an element to reach a specific state."""
214
208
  group_id = generate_group_id("browser_wait_for_element", f"{selector[:50]}_{state}")
215
209
  emit_info(
216
- f"[bold white on blue] BROWSER WAIT FOR ELEMENT [/bold white on blue] ⏱️ selector='{selector}' state={state} timeout={timeout}ms",
210
+ f"BROWSER WAIT FOR ELEMENT ⏱️ selector='{selector}' state={state} timeout={timeout}ms",
217
211
  message_group=group_id,
218
212
  )
219
213
  try:
220
- browser_manager = get_camoufox_manager()
214
+ browser_manager = get_session_browser_manager()
221
215
  page = await browser_manager.get_current_page()
222
216
 
223
217
  if not page:
224
218
  return {"success": False, "error": "No active browser page available"}
225
219
 
226
- element = page.locator(selector)
220
+ element = page.locator(selector).first
227
221
  await element.wait_for(state=state, timeout=timeout)
228
222
 
229
- emit_info(
230
- f"[green]Element {selector} is now {state}[/green]", message_group=group_id
231
- )
223
+ emit_success(f"Element {selector} is now {state}", message_group=group_id)
232
224
 
233
225
  return {"success": True, "selector": selector, "state": state}
234
226
 
@@ -246,17 +238,17 @@ async def highlight_element(
246
238
  "browser_highlight_element", f"{selector[:50]}_{color}"
247
239
  )
248
240
  emit_info(
249
- f"[bold white on blue] BROWSER HIGHLIGHT ELEMENT [/bold white on blue] 🔦 selector='{selector}' color={color}",
241
+ f"BROWSER HIGHLIGHT ELEMENT 🔦 selector='{selector}' color={color}",
250
242
  message_group=group_id,
251
243
  )
252
244
  try:
253
- browser_manager = get_camoufox_manager()
245
+ browser_manager = get_session_browser_manager()
254
246
  page = await browser_manager.get_current_page()
255
247
 
256
248
  if not page:
257
249
  return {"success": False, "error": "No active browser page available"}
258
250
 
259
- element = page.locator(selector)
251
+ element = page.locator(selector).first
260
252
  await element.wait_for(state="visible", timeout=timeout)
261
253
 
262
254
  # Add highlight style
@@ -271,9 +263,7 @@ async def highlight_element(
271
263
 
272
264
  await element.evaluate(highlight_script)
273
265
 
274
- emit_info(
275
- f"[green]Highlighted element: {selector}[/green]", message_group=group_id
276
- )
266
+ emit_success(f"Highlighted element: {selector}", message_group=group_id)
277
267
 
278
268
  return {"success": True, "selector": selector, "color": color}
279
269
 
@@ -285,11 +275,11 @@ async def clear_highlights() -> Dict[str, Any]:
285
275
  """Clear all element highlights."""
286
276
  group_id = generate_group_id("browser_clear_highlights")
287
277
  emit_info(
288
- "[bold white on blue] BROWSER CLEAR HIGHLIGHTS [/bold white on blue] 🧹",
278
+ "BROWSER CLEAR HIGHLIGHTS 🧹",
289
279
  message_group=group_id,
290
280
  )
291
281
  try:
292
- browser_manager = get_camoufox_manager()
282
+ browser_manager = get_session_browser_manager()
293
283
  page = await browser_manager.get_current_page()
294
284
 
295
285
  if not page:
@@ -311,7 +301,7 @@ async def clear_highlights() -> Dict[str, Any]:
311
301
 
312
302
  count = await page.evaluate(clear_script)
313
303
 
314
- emit_info(f"[green]Cleared {count} highlights[/green]", message_group=group_id)
304
+ emit_success(f"Cleared {count} highlights", message_group=group_id)
315
305
 
316
306
  return {"success": True, "cleared_count": count}
317
307