code-puppy 0.0.171__py3-none-any.whl → 0.0.173__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_puppy/agent.py +8 -8
- code_puppy/agents/agent_creator_agent.py +0 -3
- code_puppy/agents/agent_qa_kitten.py +203 -0
- code_puppy/agents/base_agent.py +398 -2
- code_puppy/command_line/command_handler.py +68 -28
- code_puppy/command_line/mcp/add_command.py +2 -2
- code_puppy/command_line/mcp/base.py +1 -1
- code_puppy/command_line/mcp/install_command.py +2 -2
- code_puppy/command_line/mcp/list_command.py +1 -1
- code_puppy/command_line/mcp/search_command.py +1 -1
- code_puppy/command_line/mcp/start_all_command.py +1 -1
- code_puppy/command_line/mcp/status_command.py +2 -2
- code_puppy/command_line/mcp/stop_all_command.py +1 -1
- code_puppy/command_line/mcp/utils.py +1 -1
- code_puppy/command_line/mcp/wizard_utils.py +2 -2
- code_puppy/config.py +141 -12
- code_puppy/http_utils.py +50 -24
- code_puppy/main.py +2 -1
- code_puppy/{mcp → mcp_}/config_wizard.py +1 -1
- code_puppy/{mcp → mcp_}/examples/retry_example.py +1 -1
- code_puppy/{mcp → mcp_}/managed_server.py +1 -1
- code_puppy/{mcp → mcp_}/server_registry_catalog.py +1 -3
- code_puppy/message_history_processor.py +83 -221
- code_puppy/messaging/message_queue.py +4 -4
- code_puppy/state_management.py +1 -100
- code_puppy/tools/__init__.py +103 -6
- code_puppy/tools/browser/__init__.py +0 -0
- code_puppy/tools/browser/browser_control.py +293 -0
- code_puppy/tools/browser/browser_interactions.py +552 -0
- code_puppy/tools/browser/browser_locators.py +642 -0
- code_puppy/tools/browser/browser_navigation.py +251 -0
- code_puppy/tools/browser/browser_screenshot.py +242 -0
- code_puppy/tools/browser/browser_scripts.py +478 -0
- code_puppy/tools/browser/browser_workflows.py +196 -0
- code_puppy/tools/browser/camoufox_manager.py +194 -0
- code_puppy/tools/browser/vqa_agent.py +66 -0
- code_puppy/tools/browser_control.py +293 -0
- code_puppy/tools/browser_interactions.py +552 -0
- code_puppy/tools/browser_locators.py +642 -0
- code_puppy/tools/browser_navigation.py +251 -0
- code_puppy/tools/browser_screenshot.py +278 -0
- code_puppy/tools/browser_scripts.py +478 -0
- code_puppy/tools/browser_workflows.py +215 -0
- code_puppy/tools/camoufox_manager.py +150 -0
- code_puppy/tools/command_runner.py +13 -8
- code_puppy/tools/file_operations.py +7 -7
- code_puppy/tui/app.py +1 -1
- code_puppy/tui/components/custom_widgets.py +1 -1
- code_puppy/tui/screens/mcp_install_wizard.py +8 -8
- code_puppy/tui_state.py +55 -0
- {code_puppy-0.0.171.dist-info → code_puppy-0.0.173.dist-info}/METADATA +3 -1
- code_puppy-0.0.173.dist-info/RECORD +132 -0
- code_puppy-0.0.171.dist-info/RECORD +0 -112
- /code_puppy/{mcp → mcp_}/__init__.py +0 -0
- /code_puppy/{mcp → mcp_}/async_lifecycle.py +0 -0
- /code_puppy/{mcp → mcp_}/blocking_startup.py +0 -0
- /code_puppy/{mcp → mcp_}/captured_stdio_server.py +0 -0
- /code_puppy/{mcp → mcp_}/circuit_breaker.py +0 -0
- /code_puppy/{mcp → mcp_}/dashboard.py +0 -0
- /code_puppy/{mcp → mcp_}/error_isolation.py +0 -0
- /code_puppy/{mcp → mcp_}/health_monitor.py +0 -0
- /code_puppy/{mcp → mcp_}/manager.py +0 -0
- /code_puppy/{mcp → mcp_}/registry.py +0 -0
- /code_puppy/{mcp → mcp_}/retry_manager.py +0 -0
- /code_puppy/{mcp → mcp_}/status_tracker.py +0 -0
- /code_puppy/{mcp → mcp_}/system_tools.py +0 -0
- {code_puppy-0.0.171.data → code_puppy-0.0.173.data}/data/code_puppy/models.json +0 -0
- {code_puppy-0.0.171.dist-info → code_puppy-0.0.173.dist-info}/WHEEL +0 -0
- {code_puppy-0.0.171.dist-info → code_puppy-0.0.173.dist-info}/entry_points.txt +0 -0
- {code_puppy-0.0.171.dist-info → code_puppy-0.0.173.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""Browser navigation and control tools."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
from pydantic_ai import RunContext
|
|
6
|
+
|
|
7
|
+
from code_puppy.messaging import emit_info
|
|
8
|
+
from code_puppy.tools.common import generate_group_id
|
|
9
|
+
|
|
10
|
+
from .camoufox_manager import get_camoufox_manager
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
async def navigate_to_url(url: str) -> Dict[str, Any]:
|
|
14
|
+
"""Navigate to a specific URL."""
|
|
15
|
+
group_id = generate_group_id("browser_navigate", url)
|
|
16
|
+
emit_info(
|
|
17
|
+
f"[bold white on blue] BROWSER NAVIGATE [/bold white on blue] 🌐 {url}",
|
|
18
|
+
message_group=group_id,
|
|
19
|
+
)
|
|
20
|
+
try:
|
|
21
|
+
browser_manager = get_camoufox_manager()
|
|
22
|
+
page = await browser_manager.get_current_page()
|
|
23
|
+
|
|
24
|
+
if not page:
|
|
25
|
+
return {"success": False, "error": "No active browser page available"}
|
|
26
|
+
|
|
27
|
+
# Navigate to URL
|
|
28
|
+
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
|
29
|
+
|
|
30
|
+
# Get final URL (in case of redirects)
|
|
31
|
+
final_url = page.url
|
|
32
|
+
title = await page.title()
|
|
33
|
+
|
|
34
|
+
emit_info(f"[green]Navigated to: {final_url}[/green]", message_group=group_id)
|
|
35
|
+
|
|
36
|
+
return {"success": True, "url": final_url, "title": title, "requested_url": url}
|
|
37
|
+
|
|
38
|
+
except Exception as e:
|
|
39
|
+
emit_info(f"[red]Navigation failed: {str(e)}[/red]", message_group=group_id)
|
|
40
|
+
return {"success": False, "error": str(e), "url": url}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
async def get_page_info() -> Dict[str, Any]:
|
|
44
|
+
"""Get current page information."""
|
|
45
|
+
group_id = generate_group_id("browser_get_page_info")
|
|
46
|
+
emit_info(
|
|
47
|
+
"[bold white on blue] BROWSER GET PAGE INFO [/bold white on blue] 📌",
|
|
48
|
+
message_group=group_id,
|
|
49
|
+
)
|
|
50
|
+
try:
|
|
51
|
+
browser_manager = get_camoufox_manager()
|
|
52
|
+
page = await browser_manager.get_current_page()
|
|
53
|
+
|
|
54
|
+
if not page:
|
|
55
|
+
return {"success": False, "error": "No active browser page available"}
|
|
56
|
+
|
|
57
|
+
url = page.url
|
|
58
|
+
title = await page.title()
|
|
59
|
+
|
|
60
|
+
return {"success": True, "url": url, "title": title}
|
|
61
|
+
|
|
62
|
+
except Exception as e:
|
|
63
|
+
return {"success": False, "error": str(e)}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
async def go_back() -> Dict[str, Any]:
|
|
67
|
+
"""Navigate back in browser history."""
|
|
68
|
+
group_id = generate_group_id("browser_go_back")
|
|
69
|
+
emit_info(
|
|
70
|
+
"[bold white on blue] BROWSER GO BACK [/bold white on blue] ⬅️",
|
|
71
|
+
message_group=group_id,
|
|
72
|
+
)
|
|
73
|
+
try:
|
|
74
|
+
browser_manager = get_camoufox_manager()
|
|
75
|
+
page = await browser_manager.get_current_page()
|
|
76
|
+
|
|
77
|
+
if not page:
|
|
78
|
+
return {"success": False, "error": "No active browser page available"}
|
|
79
|
+
|
|
80
|
+
await page.go_back(wait_until="domcontentloaded")
|
|
81
|
+
|
|
82
|
+
return {"success": True, "url": page.url, "title": await page.title()}
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
return {"success": False, "error": str(e)}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
async def go_forward() -> Dict[str, Any]:
|
|
89
|
+
"""Navigate forward in browser history."""
|
|
90
|
+
group_id = generate_group_id("browser_go_forward")
|
|
91
|
+
emit_info(
|
|
92
|
+
"[bold white on blue] BROWSER GO FORWARD [/bold white on blue] ➡️",
|
|
93
|
+
message_group=group_id,
|
|
94
|
+
)
|
|
95
|
+
try:
|
|
96
|
+
browser_manager = get_camoufox_manager()
|
|
97
|
+
page = await browser_manager.get_current_page()
|
|
98
|
+
|
|
99
|
+
if not page:
|
|
100
|
+
return {"success": False, "error": "No active browser page available"}
|
|
101
|
+
|
|
102
|
+
await page.go_forward(wait_until="domcontentloaded")
|
|
103
|
+
|
|
104
|
+
return {"success": True, "url": page.url, "title": await page.title()}
|
|
105
|
+
|
|
106
|
+
except Exception as e:
|
|
107
|
+
return {"success": False, "error": str(e)}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
async def reload_page(wait_until: str = "domcontentloaded") -> Dict[str, Any]:
|
|
111
|
+
"""Reload the current page."""
|
|
112
|
+
group_id = generate_group_id("browser_reload", wait_until)
|
|
113
|
+
emit_info(
|
|
114
|
+
f"[bold white on blue] BROWSER RELOAD [/bold white on blue] 🔄 wait_until={wait_until}",
|
|
115
|
+
message_group=group_id,
|
|
116
|
+
)
|
|
117
|
+
try:
|
|
118
|
+
browser_manager = get_camoufox_manager()
|
|
119
|
+
page = await browser_manager.get_current_page()
|
|
120
|
+
|
|
121
|
+
if not page:
|
|
122
|
+
return {"success": False, "error": "No active browser page available"}
|
|
123
|
+
|
|
124
|
+
await page.reload(wait_until=wait_until)
|
|
125
|
+
|
|
126
|
+
return {"success": True, "url": page.url, "title": await page.title()}
|
|
127
|
+
|
|
128
|
+
except Exception as e:
|
|
129
|
+
return {"success": False, "error": str(e)}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
async def wait_for_load_state(
|
|
133
|
+
state: str = "domcontentloaded", timeout: int = 30000
|
|
134
|
+
) -> Dict[str, Any]:
|
|
135
|
+
"""Wait for page to reach a specific load state."""
|
|
136
|
+
group_id = generate_group_id("browser_wait_for_load", f"{state}_{timeout}")
|
|
137
|
+
emit_info(
|
|
138
|
+
f"[bold white on blue] BROWSER WAIT FOR LOAD [/bold white on blue] ⏱️ state={state} timeout={timeout}ms",
|
|
139
|
+
message_group=group_id,
|
|
140
|
+
)
|
|
141
|
+
try:
|
|
142
|
+
browser_manager = get_camoufox_manager()
|
|
143
|
+
page = await browser_manager.get_current_page()
|
|
144
|
+
|
|
145
|
+
if not page:
|
|
146
|
+
return {"success": False, "error": "No active browser page available"}
|
|
147
|
+
|
|
148
|
+
await page.wait_for_load_state(state, timeout=timeout)
|
|
149
|
+
|
|
150
|
+
return {"success": True, "state": state, "url": page.url}
|
|
151
|
+
|
|
152
|
+
except Exception as e:
|
|
153
|
+
return {"success": False, "error": str(e), "state": state}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def register_navigate_to_url(agent):
|
|
157
|
+
"""Register the navigation tool."""
|
|
158
|
+
|
|
159
|
+
@agent.tool
|
|
160
|
+
async def browser_navigate(context: RunContext, url: str) -> Dict[str, Any]:
|
|
161
|
+
"""
|
|
162
|
+
Navigate the browser to a specific URL.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
url: The URL to navigate to (must include protocol like https://)
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Dict with navigation results including final URL and page title
|
|
169
|
+
"""
|
|
170
|
+
return await navigate_to_url(url)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def register_get_page_info(agent):
|
|
174
|
+
"""Register the page info tool."""
|
|
175
|
+
|
|
176
|
+
@agent.tool
|
|
177
|
+
async def browser_get_page_info(context: RunContext) -> Dict[str, Any]:
|
|
178
|
+
"""
|
|
179
|
+
Get information about the current page.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Dict with current URL and page title
|
|
183
|
+
"""
|
|
184
|
+
return await get_page_info()
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def register_browser_go_back(agent):
|
|
188
|
+
"""Register browser go back tool."""
|
|
189
|
+
|
|
190
|
+
@agent.tool
|
|
191
|
+
async def browser_go_back(context: RunContext) -> Dict[str, Any]:
|
|
192
|
+
"""
|
|
193
|
+
Navigate back in browser history.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
Dict with navigation results
|
|
197
|
+
"""
|
|
198
|
+
return await go_back()
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def register_browser_go_forward(agent):
|
|
202
|
+
"""Register browser go forward tool."""
|
|
203
|
+
|
|
204
|
+
@agent.tool
|
|
205
|
+
async def browser_go_forward(context: RunContext) -> Dict[str, Any]:
|
|
206
|
+
"""
|
|
207
|
+
Navigate forward in browser history.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Dict with navigation results
|
|
211
|
+
"""
|
|
212
|
+
return await go_forward()
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def register_reload_page(agent):
|
|
216
|
+
"""Register the page reload tool."""
|
|
217
|
+
|
|
218
|
+
@agent.tool
|
|
219
|
+
async def browser_reload(
|
|
220
|
+
context: RunContext, wait_until: str = "domcontentloaded"
|
|
221
|
+
) -> Dict[str, Any]:
|
|
222
|
+
"""
|
|
223
|
+
Reload the current page.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
wait_until: Load state to wait for (networkidle, domcontentloaded, load)
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Dict with reload results
|
|
230
|
+
"""
|
|
231
|
+
return await reload_page(wait_until)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def register_wait_for_load_state(agent):
|
|
235
|
+
"""Register the wait for load state tool."""
|
|
236
|
+
|
|
237
|
+
@agent.tool
|
|
238
|
+
async def browser_wait_for_load(
|
|
239
|
+
context: RunContext, state: str = "domcontentloaded", timeout: int = 30000
|
|
240
|
+
) -> Dict[str, Any]:
|
|
241
|
+
"""
|
|
242
|
+
Wait for the page to reach a specific load state.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
state: Load state to wait for (networkidle, domcontentloaded, load)
|
|
246
|
+
timeout: Timeout in milliseconds
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Dict with wait results
|
|
250
|
+
"""
|
|
251
|
+
return await wait_for_load_state(state, timeout)
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""Screenshot and visual analysis tool with VQA capabilities."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from tempfile import gettempdir, mkdtemp
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
from pydantic_ai import RunContext
|
|
11
|
+
|
|
12
|
+
from code_puppy.messaging import emit_error, emit_info
|
|
13
|
+
from code_puppy.tools.common import generate_group_id
|
|
14
|
+
|
|
15
|
+
from .camoufox_manager import get_camoufox_manager
|
|
16
|
+
from .vqa_agent import VisualAnalysisResult, run_vqa_analysis
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
_TEMP_SCREENSHOT_ROOT = Path(mkdtemp(prefix="code_puppy_screenshots_", dir=gettempdir()))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _build_screenshot_path(timestamp: str) -> Path:
|
|
23
|
+
"""Return the target path for a screenshot using a shared temp directory."""
|
|
24
|
+
filename = f"screenshot_{timestamp}.png"
|
|
25
|
+
return _TEMP_SCREENSHOT_ROOT / filename
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ScreenshotResult(BaseModel):
|
|
29
|
+
"""Result from screenshot operation."""
|
|
30
|
+
|
|
31
|
+
success: bool
|
|
32
|
+
screenshot_path: Optional[str] = None
|
|
33
|
+
screenshot_data: Optional[bytes] = None
|
|
34
|
+
timestamp: Optional[str] = None
|
|
35
|
+
error: Optional[str] = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def _capture_screenshot(
|
|
39
|
+
page,
|
|
40
|
+
full_page: bool = False,
|
|
41
|
+
element_selector: Optional[str] = None,
|
|
42
|
+
save_screenshot: bool = True,
|
|
43
|
+
group_id: Optional[str] = None,
|
|
44
|
+
) -> Dict[str, Any]:
|
|
45
|
+
"""Internal screenshot capture function."""
|
|
46
|
+
try:
|
|
47
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
48
|
+
|
|
49
|
+
# Take screenshot
|
|
50
|
+
if element_selector:
|
|
51
|
+
# Screenshot specific element
|
|
52
|
+
element = await page.locator(element_selector).first
|
|
53
|
+
if not await element.is_visible():
|
|
54
|
+
return {
|
|
55
|
+
"success": False,
|
|
56
|
+
"error": f"Element '{element_selector}' is not visible",
|
|
57
|
+
}
|
|
58
|
+
screenshot_data = await element.screenshot()
|
|
59
|
+
else:
|
|
60
|
+
# Screenshot page or full page
|
|
61
|
+
screenshot_data = await page.screenshot(full_page=full_page)
|
|
62
|
+
|
|
63
|
+
result = {
|
|
64
|
+
"success": True,
|
|
65
|
+
"screenshot_data": screenshot_data,
|
|
66
|
+
"timestamp": timestamp,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if save_screenshot:
|
|
70
|
+
screenshot_path = _build_screenshot_path(timestamp)
|
|
71
|
+
screenshot_path.parent.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
|
|
73
|
+
with open(screenshot_path, "wb") as f:
|
|
74
|
+
f.write(screenshot_data)
|
|
75
|
+
|
|
76
|
+
result["screenshot_path"] = str(screenshot_path)
|
|
77
|
+
message = f"[green]Screenshot saved: {screenshot_path}[/green]"
|
|
78
|
+
if group_id:
|
|
79
|
+
emit_info(message, message_group=group_id)
|
|
80
|
+
else:
|
|
81
|
+
emit_info(message)
|
|
82
|
+
|
|
83
|
+
return result
|
|
84
|
+
|
|
85
|
+
except Exception as e:
|
|
86
|
+
return {"success": False, "error": str(e)}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
async def take_screenshot_and_analyze(
|
|
90
|
+
question: str,
|
|
91
|
+
full_page: bool = False,
|
|
92
|
+
element_selector: Optional[str] = None,
|
|
93
|
+
save_screenshot: bool = True,
|
|
94
|
+
) -> Dict[str, Any]:
|
|
95
|
+
"""
|
|
96
|
+
Take a screenshot and analyze it using visual understanding.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
question: The specific question to ask about the screenshot
|
|
100
|
+
full_page: Whether to capture the full page or just viewport
|
|
101
|
+
element_selector: Optional selector to screenshot just a specific element
|
|
102
|
+
save_screenshot: Whether to save the screenshot to disk
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Dict containing analysis results and screenshot info
|
|
106
|
+
"""
|
|
107
|
+
target = element_selector or ("full_page" if full_page else "viewport")
|
|
108
|
+
group_id = generate_group_id(
|
|
109
|
+
"browser_screenshot_analyze", f"{question[:50]}_{target}"
|
|
110
|
+
)
|
|
111
|
+
emit_info(
|
|
112
|
+
f"[bold white on blue] BROWSER SCREENSHOT ANALYZE [/bold white on blue] 📷 question='{question[:100]}{'...' if len(question) > 100 else ''}' target={target}",
|
|
113
|
+
message_group=group_id,
|
|
114
|
+
)
|
|
115
|
+
try:
|
|
116
|
+
# Get the current browser page
|
|
117
|
+
browser_manager = get_camoufox_manager()
|
|
118
|
+
page = await browser_manager.get_current_page()
|
|
119
|
+
|
|
120
|
+
if not page:
|
|
121
|
+
return {
|
|
122
|
+
"success": False,
|
|
123
|
+
"error": "No active browser page available. Please navigate to a webpage first.",
|
|
124
|
+
"question": question,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
# Take screenshot
|
|
128
|
+
screenshot_result = await _capture_screenshot(
|
|
129
|
+
page,
|
|
130
|
+
full_page=full_page,
|
|
131
|
+
element_selector=element_selector,
|
|
132
|
+
save_screenshot=save_screenshot,
|
|
133
|
+
group_id=group_id,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
if not screenshot_result["success"]:
|
|
137
|
+
error_message = screenshot_result.get("error", "Screenshot failed")
|
|
138
|
+
emit_error(
|
|
139
|
+
f"[red]Screenshot capture failed: {error_message}[/red]",
|
|
140
|
+
message_group=group_id,
|
|
141
|
+
)
|
|
142
|
+
return {
|
|
143
|
+
"success": False,
|
|
144
|
+
"error": error_message,
|
|
145
|
+
"question": question,
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
screenshot_bytes = screenshot_result.get("screenshot_data")
|
|
149
|
+
if not screenshot_bytes:
|
|
150
|
+
emit_error(
|
|
151
|
+
"[red]Screenshot captured but pixel data missing; cannot run visual analysis.[/red]",
|
|
152
|
+
message_group=group_id,
|
|
153
|
+
)
|
|
154
|
+
return {
|
|
155
|
+
"success": False,
|
|
156
|
+
"error": "Screenshot captured but no image bytes available for analysis.",
|
|
157
|
+
"question": question,
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
vqa_result = await asyncio.to_thread(
|
|
162
|
+
run_vqa_analysis,
|
|
163
|
+
question,
|
|
164
|
+
screenshot_bytes,
|
|
165
|
+
)
|
|
166
|
+
except Exception as exc:
|
|
167
|
+
emit_error(
|
|
168
|
+
f"[red]Visual question answering failed: {exc}[/red]",
|
|
169
|
+
message_group=group_id,
|
|
170
|
+
)
|
|
171
|
+
return {
|
|
172
|
+
"success": False,
|
|
173
|
+
"error": f"Visual analysis failed: {exc}",
|
|
174
|
+
"question": question,
|
|
175
|
+
"screenshot_info": {
|
|
176
|
+
"path": screenshot_result.get("screenshot_path"),
|
|
177
|
+
"timestamp": screenshot_result.get("timestamp"),
|
|
178
|
+
"full_page": full_page,
|
|
179
|
+
"element_selector": element_selector,
|
|
180
|
+
},
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
emit_info(
|
|
184
|
+
f"[green]Visual analysis answer: {vqa_result.answer}[/green]",
|
|
185
|
+
message_group=group_id,
|
|
186
|
+
)
|
|
187
|
+
emit_info(
|
|
188
|
+
f"[dim]Observations: {vqa_result.observations}[/dim]",
|
|
189
|
+
message_group=group_id,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
return {
|
|
193
|
+
"success": True,
|
|
194
|
+
"question": question,
|
|
195
|
+
"answer": vqa_result.answer,
|
|
196
|
+
"confidence": vqa_result.confidence,
|
|
197
|
+
"observations": vqa_result.observations,
|
|
198
|
+
"screenshot_info": {
|
|
199
|
+
"path": screenshot_result.get("screenshot_path"),
|
|
200
|
+
"size": len(screenshot_bytes),
|
|
201
|
+
"timestamp": screenshot_result.get("timestamp"),
|
|
202
|
+
"full_page": full_page,
|
|
203
|
+
"element_selector": element_selector,
|
|
204
|
+
},
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
except Exception as e:
|
|
208
|
+
emit_info(
|
|
209
|
+
f"[red]Screenshot analysis failed: {str(e)}[/red]", message_group=group_id
|
|
210
|
+
)
|
|
211
|
+
return {"success": False, "error": str(e), "question": question}
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def register_take_screenshot_and_analyze(agent):
|
|
215
|
+
"""Register the screenshot analysis tool."""
|
|
216
|
+
|
|
217
|
+
@agent.tool
|
|
218
|
+
async def browser_screenshot_analyze(
|
|
219
|
+
context: RunContext,
|
|
220
|
+
question: str,
|
|
221
|
+
full_page: bool = False,
|
|
222
|
+
element_selector: Optional[str] = None,
|
|
223
|
+
save_screenshot: bool = True,
|
|
224
|
+
) -> Dict[str, Any]:
|
|
225
|
+
"""
|
|
226
|
+
Take a screenshot and analyze it to answer a specific question.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
question: The specific question to ask about the screenshot
|
|
230
|
+
full_page: Whether to capture the full page or just viewport
|
|
231
|
+
element_selector: Optional CSS/XPath selector to screenshot specific element
|
|
232
|
+
save_screenshot: Whether to save the screenshot to disk
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
Dict with analysis results including answer, confidence, and observations
|
|
236
|
+
"""
|
|
237
|
+
return await take_screenshot_and_analyze(
|
|
238
|
+
question=question,
|
|
239
|
+
full_page=full_page,
|
|
240
|
+
element_selector=element_selector,
|
|
241
|
+
save_screenshot=save_screenshot,
|
|
242
|
+
)
|