code-puppy 0.0.356__py3-none-any.whl → 0.0.358__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. code_puppy/agents/agent_qa_kitten.py +10 -5
  2. code_puppy/agents/agent_terminal_qa.py +323 -0
  3. code_puppy/api/app.py +79 -2
  4. code_puppy/api/routers/commands.py +21 -2
  5. code_puppy/api/routers/sessions.py +49 -8
  6. code_puppy/config.py +5 -2
  7. code_puppy/tools/__init__.py +37 -0
  8. code_puppy/tools/agent_tools.py +26 -1
  9. code_puppy/tools/browser/__init__.py +41 -0
  10. code_puppy/tools/browser/browser_control.py +6 -6
  11. code_puppy/tools/browser/browser_interactions.py +21 -20
  12. code_puppy/tools/browser/browser_locators.py +9 -9
  13. code_puppy/tools/browser/browser_navigation.py +7 -7
  14. code_puppy/tools/browser/browser_screenshot.py +60 -135
  15. code_puppy/tools/browser/browser_screenshot_vqa.py +195 -0
  16. code_puppy/tools/browser/browser_scripts.py +15 -13
  17. code_puppy/tools/browser/camoufox_manager.py +226 -64
  18. code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
  19. code_puppy/tools/browser/terminal_command_tools.py +521 -0
  20. code_puppy/tools/browser/terminal_screenshot_tools.py +520 -0
  21. code_puppy/tools/browser/terminal_tools.py +525 -0
  22. code_puppy/tools/browser/vqa_agent.py +138 -34
  23. code_puppy/tools/command_runner.py +0 -1
  24. {code_puppy-0.0.356.dist-info → code_puppy-0.0.358.dist-info}/METADATA +1 -1
  25. {code_puppy-0.0.356.dist-info → code_puppy-0.0.358.dist-info}/RECORD +30 -24
  26. {code_puppy-0.0.356.data → code_puppy-0.0.358.data}/data/code_puppy/models.json +0 -0
  27. {code_puppy-0.0.356.data → code_puppy-0.0.358.data}/data/code_puppy/models_dev_api.json +0 -0
  28. {code_puppy-0.0.356.dist-info → code_puppy-0.0.358.dist-info}/WHEEL +0 -0
  29. {code_puppy-0.0.356.dist-info → code_puppy-0.0.358.dist-info}/entry_points.txt +0 -0
  30. {code_puppy-0.0.356.dist-info → code_puppy-0.0.358.dist-info}/licenses/LICENSE +0 -0
@@ -1,19 +1,21 @@
1
- """Screenshot and visual analysis tool with VQA capabilities."""
1
+ """Screenshot tool for browser automation.
2
2
 
3
- import asyncio
3
+ Captures screenshots and returns them as base64 data that multimodal
4
+ models can directly see and analyze - no separate VQA agent needed.
5
+ """
6
+
7
+ import base64
4
8
  from datetime import datetime
5
9
  from pathlib import Path
6
10
  from tempfile import gettempdir, mkdtemp
7
11
  from typing import Any, Dict, Optional
8
12
 
9
- from pydantic import BaseModel
10
13
  from pydantic_ai import RunContext
11
14
 
12
15
  from code_puppy.messaging import emit_error, emit_info, emit_success
13
16
  from code_puppy.tools.common import generate_group_id
14
17
 
15
- from .camoufox_manager import get_camoufox_manager
16
- from .vqa_agent import run_vqa_analysis
18
+ from .camoufox_manager import get_session_browser_manager
17
19
 
18
20
  _TEMP_SCREENSHOT_ROOT = Path(
19
21
  mkdtemp(prefix="code_puppy_screenshots_", dir=gettempdir())
@@ -21,21 +23,11 @@ _TEMP_SCREENSHOT_ROOT = Path(
21
23
 
22
24
 
23
25
  def _build_screenshot_path(timestamp: str) -> Path:
24
- """Return the target path for a screenshot using a shared temp directory."""
26
+ """Return the target path for a screenshot."""
25
27
  filename = f"screenshot_{timestamp}.png"
26
28
  return _TEMP_SCREENSHOT_ROOT / filename
27
29
 
28
30
 
29
- class ScreenshotResult(BaseModel):
30
- """Result from screenshot operation."""
31
-
32
- success: bool
33
- screenshot_path: Optional[str] = None
34
- screenshot_data: Optional[bytes] = None
35
- timestamp: Optional[str] = None
36
- error: Optional[str] = None
37
-
38
-
39
31
  async def _capture_screenshot(
40
32
  page,
41
33
  full_page: bool = False,
@@ -45,41 +37,38 @@ async def _capture_screenshot(
45
37
  ) -> Dict[str, Any]:
46
38
  """Internal screenshot capture function."""
47
39
  try:
48
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
40
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
49
41
 
50
42
  # Take screenshot
51
43
  if element_selector:
52
- # Screenshot specific element
53
44
  element = await page.locator(element_selector).first
54
45
  if not await element.is_visible():
55
46
  return {
56
47
  "success": False,
57
48
  "error": f"Element '{element_selector}' is not visible",
58
49
  }
59
- screenshot_data = await element.screenshot()
50
+ screenshot_bytes = await element.screenshot()
60
51
  else:
61
- # Screenshot page or full page
62
- screenshot_data = await page.screenshot(full_page=full_page)
52
+ screenshot_bytes = await page.screenshot(full_page=full_page)
63
53
 
64
- result = {
54
+ result: Dict[str, Any] = {
65
55
  "success": True,
66
- "screenshot_data": screenshot_data,
56
+ "screenshot_bytes": screenshot_bytes,
57
+ "base64_data": base64.b64encode(screenshot_bytes).decode("utf-8"),
67
58
  "timestamp": timestamp,
68
59
  }
69
60
 
70
61
  if save_screenshot:
71
62
  screenshot_path = _build_screenshot_path(timestamp)
72
63
  screenshot_path.parent.mkdir(parents=True, exist_ok=True)
73
-
74
64
  with open(screenshot_path, "wb") as f:
75
- f.write(screenshot_data)
76
-
65
+ f.write(screenshot_bytes)
77
66
  result["screenshot_path"] = str(screenshot_path)
78
- message = f"Screenshot saved: {screenshot_path}"
67
+
79
68
  if group_id:
80
- emit_success(message, message_group=group_id)
81
- else:
82
- emit_success(message)
69
+ emit_success(
70
+ f"Screenshot saved: {screenshot_path}", message_group=group_id
71
+ )
83
72
 
84
73
  return result
85
74
 
@@ -87,46 +76,43 @@ async def _capture_screenshot(
87
76
  return {"success": False, "error": str(e)}
88
77
 
89
78
 
90
- async def take_screenshot_and_analyze(
91
- question: str,
79
+ async def take_screenshot(
92
80
  full_page: bool = False,
93
81
  element_selector: Optional[str] = None,
94
82
  save_screenshot: bool = True,
95
83
  ) -> Dict[str, Any]:
96
- """
97
- Take a screenshot and analyze it using visual understanding.
84
+ """Take a screenshot of the browser page.
85
+
86
+ Returns the screenshot as base64-encoded PNG data that multimodal
87
+ models can directly see and analyze.
98
88
 
99
89
  Args:
100
- question: The specific question to ask about the screenshot
101
- full_page: Whether to capture the full page or just viewport
102
- element_selector: Optional selector to screenshot just a specific element
103
- save_screenshot: Whether to save the screenshot to disk
90
+ full_page: Whether to capture full page or just viewport.
91
+ element_selector: Optional selector to screenshot specific element.
92
+ save_screenshot: Whether to save the screenshot to disk.
104
93
 
105
94
  Returns:
106
- Dict containing analysis results and screenshot info
95
+ Dict containing:
96
+ - success (bool): True if screenshot was captured.
97
+ - base64_image (str): Base64-encoded PNG image data.
98
+ - media_type (str): Always "image/png".
99
+ - screenshot_path (str): Path to saved file (if saved).
100
+ - error (str): Error message if unsuccessful.
107
101
  """
108
102
  target = element_selector or ("full_page" if full_page else "viewport")
109
- group_id = generate_group_id(
110
- "browser_screenshot_analyze", f"{question[:50]}_{target}"
111
- )
112
- emit_info(
113
- f"BROWSER SCREENSHOT ANALYZE 📷 question='{question[:100]}{'...' if len(question) > 100 else ''}' target={target}",
114
- message_group=group_id,
115
- )
103
+ group_id = generate_group_id("browser_screenshot", target)
104
+ emit_info(f"BROWSER SCREENSHOT 📷 target={target}", message_group=group_id)
105
+
116
106
  try:
117
- # Get the current browser page
118
- browser_manager = get_camoufox_manager()
107
+ browser_manager = get_session_browser_manager()
119
108
  page = await browser_manager.get_current_page()
120
109
 
121
110
  if not page:
122
- return {
123
- "success": False,
124
- "error": "No active browser page available. Please navigate to a webpage first.",
125
- "question": question,
126
- }
111
+ error_msg = "No active browser page. Navigate to a webpage first."
112
+ emit_error(error_msg, message_group=group_id)
113
+ return {"success": False, "error": error_msg}
127
114
 
128
- # Take screenshot
129
- screenshot_result = await _capture_screenshot(
115
+ result = await _capture_screenshot(
130
116
  page,
131
117
  full_page=full_page,
132
118
  element_selector=element_selector,
@@ -134,108 +120,47 @@ async def take_screenshot_and_analyze(
134
120
  group_id=group_id,
135
121
  )
136
122
 
137
- if not screenshot_result["success"]:
138
- error_message = screenshot_result.get("error", "Screenshot failed")
139
- emit_error(
140
- f"Screenshot capture failed: {error_message}",
141
- message_group=group_id,
142
- )
143
- return {
144
- "success": False,
145
- "error": error_message,
146
- "question": question,
147
- }
148
-
149
- screenshot_bytes = screenshot_result.get("screenshot_data")
150
- if not screenshot_bytes:
151
- emit_error(
152
- "Screenshot captured but pixel data missing; cannot run visual analysis.",
153
- message_group=group_id,
154
- )
155
- return {
156
- "success": False,
157
- "error": "Screenshot captured but no image bytes available for analysis.",
158
- "question": question,
159
- }
160
-
161
- try:
162
- vqa_result = await asyncio.to_thread(
163
- run_vqa_analysis,
164
- question,
165
- screenshot_bytes,
166
- )
167
- except Exception as exc:
168
- emit_error(
169
- f"Visual question answering failed: {exc}",
170
- message_group=group_id,
171
- )
172
- return {
173
- "success": False,
174
- "error": f"Visual analysis failed: {exc}",
175
- "question": question,
176
- "screenshot_info": {
177
- "path": screenshot_result.get("screenshot_path"),
178
- "timestamp": screenshot_result.get("timestamp"),
179
- "full_page": full_page,
180
- "element_selector": element_selector,
181
- },
182
- }
183
-
184
- emit_success(
185
- f"Visual analysis answer: {vqa_result.answer}",
186
- message_group=group_id,
187
- )
188
- emit_info(
189
- f"Observations: {vqa_result.observations}",
190
- message_group=group_id,
191
- )
123
+ if not result["success"]:
124
+ emit_error(result.get("error", "Screenshot failed"), message_group=group_id)
125
+ return result
192
126
 
193
127
  return {
194
128
  "success": True,
195
- "question": question,
196
- "answer": vqa_result.answer,
197
- "confidence": vqa_result.confidence,
198
- "observations": vqa_result.observations,
199
- "screenshot_info": {
200
- "path": screenshot_result.get("screenshot_path"),
201
- "size": len(screenshot_bytes),
202
- "timestamp": screenshot_result.get("timestamp"),
203
- "full_page": full_page,
204
- "element_selector": element_selector,
205
- },
129
+ "base64_image": result["base64_data"],
130
+ "media_type": "image/png",
131
+ "screenshot_path": result.get("screenshot_path"),
132
+ "message": "Screenshot captured. The base64_image contains the browser view.",
206
133
  }
207
134
 
208
135
  except Exception as e:
209
- emit_error(f"Screenshot analysis failed: {str(e)}", message_group=group_id)
210
- return {"success": False, "error": str(e), "question": question}
136
+ error_msg = f"Screenshot failed: {str(e)}"
137
+ emit_error(error_msg, message_group=group_id)
138
+ return {"success": False, "error": error_msg}
211
139
 
212
140
 
213
141
  def register_take_screenshot_and_analyze(agent):
214
- """Register the screenshot analysis tool."""
142
+ """Register the screenshot tool."""
215
143
 
216
144
  @agent.tool
217
145
  async def browser_screenshot_analyze(
218
146
  context: RunContext,
219
- question: str,
220
147
  full_page: bool = False,
221
148
  element_selector: Optional[str] = None,
222
- save_screenshot: bool = True,
223
149
  ) -> Dict[str, Any]:
224
150
  """
225
- Take a screenshot and analyze it to answer a specific question.
151
+ Take a screenshot of the browser page.
152
+
153
+ Returns the screenshot as base64 image data that you can see directly.
154
+ Use this to see what's displayed in the browser.
226
155
 
227
156
  Args:
228
- question: The specific question to ask about the screenshot
229
- full_page: Whether to capture the full page or just viewport
230
- element_selector: Optional CSS/XPath selector to screenshot specific element
231
- save_screenshot: Whether to save the screenshot to disk
157
+ full_page: Capture full page (True) or just viewport (False).
158
+ element_selector: Optional CSS selector to screenshot specific element.
232
159
 
233
160
  Returns:
234
- Dict with analysis results including answer, confidence, and observations
161
+ Dict with base64_image (PNG data you can see), screenshot_path, etc.
235
162
  """
236
- return await take_screenshot_and_analyze(
237
- question=question,
163
+ return await take_screenshot(
238
164
  full_page=full_page,
239
165
  element_selector=element_selector,
240
- save_screenshot=save_screenshot,
241
166
  )
@@ -0,0 +1,195 @@
1
+ """VQA-based Screenshot tool for browser automation (qa-kitten).
2
+
3
+ This module provides screenshot analysis using a dedicated VQA agent.
4
+ Unlike browser_screenshot.py which returns raw base64 bytes for multimodal
5
+ models to see directly, this version offloads the visual analysis to a
6
+ separate VQA agent, helping manage context in the calling agent.
7
+
8
+ Use this for qa-kitten where context management is important.
9
+ Use browser_screenshot.py for terminal-qa where direct image viewing is needed.
10
+ """
11
+
12
+ from typing import Any, Dict, Optional
13
+
14
+ from pydantic_ai import RunContext
15
+ from rich.console import Console
16
+
17
+ from code_puppy.messaging import emit_error, emit_info, emit_success
18
+ from code_puppy.tools.common import generate_group_id
19
+
20
+ from .browser_screenshot import _capture_screenshot
21
+ from .camoufox_manager import get_session_browser_manager
22
+ from .vqa_agent import run_vqa_analysis_stream
23
+
24
+
25
+ async def take_screenshot_and_analyze(
26
+ question: str,
27
+ full_page: bool = False,
28
+ element_selector: Optional[str] = None,
29
+ save_screenshot: bool = True,
30
+ ) -> Dict[str, Any]:
31
+ """Take a screenshot and analyze it using the VQA agent.
32
+
33
+ This function captures a screenshot and passes it to a dedicated
34
+ VQA (Visual Question Answering) agent for analysis. The VQA agent
35
+ runs separately, keeping the image analysis out of the calling
36
+ agent's context window.
37
+
38
+ Args:
39
+ question: The question to ask about the screenshot.
40
+ Examples:
41
+ - "What buttons are visible on this page?"
42
+ - "Is there an error message displayed?"
43
+ - "What is the main heading text?"
44
+ - "Describe the layout of this form."
45
+ full_page: Whether to capture full page or just viewport.
46
+ Defaults to False (viewport only).
47
+ element_selector: Optional CSS selector to screenshot a specific
48
+ element instead of the whole page.
49
+ save_screenshot: Whether to save the screenshot to disk.
50
+
51
+ Returns:
52
+ Dict containing:
53
+ - success (bool): True if analysis succeeded.
54
+ - answer (str): The VQA agent's streamed answer to your question.
55
+ - screenshot_info (dict): Path, timestamp, and other metadata.
56
+ - error (str): Error message if unsuccessful.
57
+ """
58
+ target = element_selector or ("full_page" if full_page else "viewport")
59
+ group_id = generate_group_id(
60
+ "browser_screenshot_analyze", f"{question[:50]}_{target}"
61
+ )
62
+ emit_info(
63
+ f"BROWSER SCREENSHOT ANALYZE 📷 question='{question[:100]}{'...' if len(question) > 100 else ''}' target={target}",
64
+ message_group=group_id,
65
+ )
66
+
67
+ try:
68
+ # Get the browser page
69
+ browser_manager = get_session_browser_manager()
70
+ page = await browser_manager.get_current_page()
71
+
72
+ if not page:
73
+ error_msg = "No active browser page. Navigate to a webpage first."
74
+ emit_error(error_msg, message_group=group_id)
75
+ return {"success": False, "error": error_msg, "question": question}
76
+
77
+ # Capture the screenshot
78
+ screenshot_result = await _capture_screenshot(
79
+ page,
80
+ full_page=full_page,
81
+ element_selector=element_selector,
82
+ save_screenshot=save_screenshot,
83
+ group_id=group_id,
84
+ )
85
+
86
+ if not screenshot_result["success"]:
87
+ error_msg = screenshot_result.get("error", "Screenshot failed")
88
+ emit_error(
89
+ f"Screenshot capture failed: {error_msg}", message_group=group_id
90
+ )
91
+ return {"success": False, "error": error_msg, "question": question}
92
+
93
+ screenshot_bytes = screenshot_result.get("screenshot_bytes")
94
+ if not screenshot_bytes:
95
+ emit_error(
96
+ "Screenshot captured but pixel data missing; cannot run visual analysis.",
97
+ message_group=group_id,
98
+ )
99
+ return {
100
+ "success": False,
101
+ "error": "Screenshot captured but no image bytes available for analysis.",
102
+ "question": question,
103
+ }
104
+
105
+ # Run VQA analysis with streaming output
106
+ try:
107
+ console = Console()
108
+ console.print() # Newline before streaming starts
109
+ console.print("[bold cyan]🔍 VQA Analysis:[/bold cyan]")
110
+
111
+ vqa_answer = await run_vqa_analysis_stream(
112
+ question,
113
+ screenshot_bytes,
114
+ )
115
+ except Exception as exc:
116
+ emit_error(
117
+ f"Visual question answering failed: {exc}",
118
+ message_group=group_id,
119
+ )
120
+ return {
121
+ "success": False,
122
+ "error": f"Visual analysis failed: {exc}",
123
+ "question": question,
124
+ "screenshot_info": {
125
+ "path": screenshot_result.get("screenshot_path"),
126
+ "timestamp": screenshot_result.get("timestamp"),
127
+ "full_page": full_page,
128
+ "element_selector": element_selector,
129
+ },
130
+ }
131
+
132
+ emit_success(
133
+ "Visual analysis complete",
134
+ message_group=group_id,
135
+ )
136
+
137
+ return {
138
+ "success": True,
139
+ "question": question,
140
+ "answer": vqa_answer,
141
+ "screenshot_info": {
142
+ "path": screenshot_result.get("screenshot_path"),
143
+ "size": len(screenshot_bytes),
144
+ "timestamp": screenshot_result.get("timestamp"),
145
+ "full_page": full_page,
146
+ "element_selector": element_selector,
147
+ },
148
+ }
149
+
150
+ except Exception as e:
151
+ error_msg = f"Screenshot analysis failed: {str(e)}"
152
+ emit_error(error_msg, message_group=group_id)
153
+ return {"success": False, "error": error_msg, "question": question}
154
+
155
+
156
+ def register_take_screenshot_and_analyze_vqa(agent):
157
+ """Register the VQA-based screenshot tool.
158
+
159
+ This tool takes a screenshot and analyzes it using a separate VQA agent.
160
+ Use this for agents where context management is important (like qa-kitten).
161
+ """
162
+
163
+ @agent.tool
164
+ async def browser_screenshot_vqa(
165
+ context: RunContext,
166
+ question: str,
167
+ full_page: bool = False,
168
+ element_selector: Optional[str] = None,
169
+ ) -> Dict[str, Any]:
170
+ """
171
+ Take a screenshot and analyze it with VQA.
172
+
173
+ Captures a screenshot of the browser and uses a visual AI to
174
+ answer your question about what's visible on the page.
175
+
176
+ Args:
177
+ question: What you want to know about the screenshot.
178
+ Examples:
179
+ - "What buttons are visible?"
180
+ - "Is there an error message?"
181
+ - "What is the page title?"
182
+ - "Is the form filled out correctly?"
183
+ full_page: Capture full page (True) or just viewport (False).
184
+ element_selector: Optional CSS selector to screenshot specific element.
185
+
186
+ Returns:
187
+ Dict with:
188
+ - answer: The streamed answer to your question
189
+ - screenshot_info: Where the screenshot was saved, etc.
190
+ """
191
+ return await take_screenshot_and_analyze(
192
+ question=question,
193
+ full_page=full_page,
194
+ element_selector=element_selector,
195
+ )
@@ -7,7 +7,7 @@ from pydantic_ai import RunContext
7
7
  from code_puppy.messaging import emit_error, emit_info, emit_success
8
8
  from code_puppy.tools.common import generate_group_id
9
9
 
10
- from .camoufox_manager import get_camoufox_manager
10
+ from .camoufox_manager import get_session_browser_manager
11
11
 
12
12
 
13
13
  async def execute_javascript(
@@ -21,14 +21,16 @@ async def execute_javascript(
21
21
  message_group=group_id,
22
22
  )
23
23
  try:
24
- browser_manager = get_camoufox_manager()
24
+ browser_manager = get_session_browser_manager()
25
25
  page = await browser_manager.get_current_page()
26
26
 
27
27
  if not page:
28
28
  return {"success": False, "error": "No active browser page available"}
29
29
 
30
30
  # Execute JavaScript
31
- result = await page.evaluate(script, timeout=timeout)
31
+ # Note: page.evaluate() does NOT accept a timeout parameter
32
+ # The timeout arg to this function is kept for API compatibility but unused
33
+ result = await page.evaluate(script)
32
34
 
33
35
  emit_success("JavaScript executed successfully", message_group=group_id)
34
36
 
@@ -52,7 +54,7 @@ async def scroll_page(
52
54
  message_group=group_id,
53
55
  )
54
56
  try:
55
- browser_manager = get_camoufox_manager()
57
+ browser_manager = get_session_browser_manager()
56
58
  page = await browser_manager.get_current_page()
57
59
 
58
60
  if not page:
@@ -60,7 +62,7 @@ async def scroll_page(
60
62
 
61
63
  if element_selector:
62
64
  # Scroll specific element
63
- element = page.locator(element_selector)
65
+ element = page.locator(element_selector).first
64
66
  await element.scroll_into_view_if_needed()
65
67
 
66
68
  # Get element's current scroll position and dimensions
@@ -146,13 +148,13 @@ async def scroll_to_element(
146
148
  message_group=group_id,
147
149
  )
148
150
  try:
149
- browser_manager = get_camoufox_manager()
151
+ browser_manager = get_session_browser_manager()
150
152
  page = await browser_manager.get_current_page()
151
153
 
152
154
  if not page:
153
155
  return {"success": False, "error": "No active browser page available"}
154
156
 
155
- element = page.locator(selector)
157
+ element = page.locator(selector).first
156
158
  await element.wait_for(state="attached", timeout=timeout)
157
159
  await element.scroll_into_view_if_needed()
158
160
 
@@ -178,7 +180,7 @@ async def set_viewport_size(
178
180
  message_group=group_id,
179
181
  )
180
182
  try:
181
- browser_manager = get_camoufox_manager()
183
+ browser_manager = get_session_browser_manager()
182
184
  page = await browser_manager.get_current_page()
183
185
 
184
186
  if not page:
@@ -209,13 +211,13 @@ async def wait_for_element(
209
211
  message_group=group_id,
210
212
  )
211
213
  try:
212
- browser_manager = get_camoufox_manager()
214
+ browser_manager = get_session_browser_manager()
213
215
  page = await browser_manager.get_current_page()
214
216
 
215
217
  if not page:
216
218
  return {"success": False, "error": "No active browser page available"}
217
219
 
218
- element = page.locator(selector)
220
+ element = page.locator(selector).first
219
221
  await element.wait_for(state=state, timeout=timeout)
220
222
 
221
223
  emit_success(f"Element {selector} is now {state}", message_group=group_id)
@@ -240,13 +242,13 @@ async def highlight_element(
240
242
  message_group=group_id,
241
243
  )
242
244
  try:
243
- browser_manager = get_camoufox_manager()
245
+ browser_manager = get_session_browser_manager()
244
246
  page = await browser_manager.get_current_page()
245
247
 
246
248
  if not page:
247
249
  return {"success": False, "error": "No active browser page available"}
248
250
 
249
- element = page.locator(selector)
251
+ element = page.locator(selector).first
250
252
  await element.wait_for(state="visible", timeout=timeout)
251
253
 
252
254
  # Add highlight style
@@ -277,7 +279,7 @@ async def clear_highlights() -> Dict[str, Any]:
277
279
  message_group=group_id,
278
280
  )
279
281
  try:
280
- browser_manager = get_camoufox_manager()
282
+ browser_manager = get_session_browser_manager()
281
283
  page = await browser_manager.get_current_page()
282
284
 
283
285
  if not page: