code-puppy 0.0.177__py3-none-any.whl → 0.0.179__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,251 +0,0 @@
1
- """Browser navigation and control tools."""
2
-
3
- from typing import Any, Dict
4
-
5
- from pydantic_ai import RunContext
6
-
7
- from code_puppy.messaging import emit_info
8
- from code_puppy.tools.common import generate_group_id
9
-
10
- from .camoufox_manager import get_camoufox_manager
11
-
12
-
13
- async def navigate_to_url(url: str) -> Dict[str, Any]:
14
- """Navigate to a specific URL."""
15
- group_id = generate_group_id("browser_navigate", url)
16
- emit_info(
17
- f"[bold white on blue] BROWSER NAVIGATE [/bold white on blue] 🌐 {url}",
18
- message_group=group_id,
19
- )
20
- try:
21
- browser_manager = get_camoufox_manager()
22
- page = await browser_manager.get_current_page()
23
-
24
- if not page:
25
- return {"success": False, "error": "No active browser page available"}
26
-
27
- # Navigate to URL
28
- await page.goto(url, wait_until="domcontentloaded", timeout=30000)
29
-
30
- # Get final URL (in case of redirects)
31
- final_url = page.url
32
- title = await page.title()
33
-
34
- emit_info(f"[green]Navigated to: {final_url}[/green]", message_group=group_id)
35
-
36
- return {"success": True, "url": final_url, "title": title, "requested_url": url}
37
-
38
- except Exception as e:
39
- emit_info(f"[red]Navigation failed: {str(e)}[/red]", message_group=group_id)
40
- return {"success": False, "error": str(e), "url": url}
41
-
42
-
43
- async def get_page_info() -> Dict[str, Any]:
44
- """Get current page information."""
45
- group_id = generate_group_id("browser_get_page_info")
46
- emit_info(
47
- "[bold white on blue] BROWSER GET PAGE INFO [/bold white on blue] 📌",
48
- message_group=group_id,
49
- )
50
- try:
51
- browser_manager = get_camoufox_manager()
52
- page = await browser_manager.get_current_page()
53
-
54
- if not page:
55
- return {"success": False, "error": "No active browser page available"}
56
-
57
- url = page.url
58
- title = await page.title()
59
-
60
- return {"success": True, "url": url, "title": title}
61
-
62
- except Exception as e:
63
- return {"success": False, "error": str(e)}
64
-
65
-
66
- async def go_back() -> Dict[str, Any]:
67
- """Navigate back in browser history."""
68
- group_id = generate_group_id("browser_go_back")
69
- emit_info(
70
- "[bold white on blue] BROWSER GO BACK [/bold white on blue] ⬅️",
71
- message_group=group_id,
72
- )
73
- try:
74
- browser_manager = get_camoufox_manager()
75
- page = await browser_manager.get_current_page()
76
-
77
- if not page:
78
- return {"success": False, "error": "No active browser page available"}
79
-
80
- await page.go_back(wait_until="domcontentloaded")
81
-
82
- return {"success": True, "url": page.url, "title": await page.title()}
83
-
84
- except Exception as e:
85
- return {"success": False, "error": str(e)}
86
-
87
-
88
- async def go_forward() -> Dict[str, Any]:
89
- """Navigate forward in browser history."""
90
- group_id = generate_group_id("browser_go_forward")
91
- emit_info(
92
- "[bold white on blue] BROWSER GO FORWARD [/bold white on blue] ➡️",
93
- message_group=group_id,
94
- )
95
- try:
96
- browser_manager = get_camoufox_manager()
97
- page = await browser_manager.get_current_page()
98
-
99
- if not page:
100
- return {"success": False, "error": "No active browser page available"}
101
-
102
- await page.go_forward(wait_until="domcontentloaded")
103
-
104
- return {"success": True, "url": page.url, "title": await page.title()}
105
-
106
- except Exception as e:
107
- return {"success": False, "error": str(e)}
108
-
109
-
110
- async def reload_page(wait_until: str = "domcontentloaded") -> Dict[str, Any]:
111
- """Reload the current page."""
112
- group_id = generate_group_id("browser_reload", wait_until)
113
- emit_info(
114
- f"[bold white on blue] BROWSER RELOAD [/bold white on blue] 🔄 wait_until={wait_until}",
115
- message_group=group_id,
116
- )
117
- try:
118
- browser_manager = get_camoufox_manager()
119
- page = await browser_manager.get_current_page()
120
-
121
- if not page:
122
- return {"success": False, "error": "No active browser page available"}
123
-
124
- await page.reload(wait_until=wait_until)
125
-
126
- return {"success": True, "url": page.url, "title": await page.title()}
127
-
128
- except Exception as e:
129
- return {"success": False, "error": str(e)}
130
-
131
-
132
- async def wait_for_load_state(
133
- state: str = "domcontentloaded", timeout: int = 30000
134
- ) -> Dict[str, Any]:
135
- """Wait for page to reach a specific load state."""
136
- group_id = generate_group_id("browser_wait_for_load", f"{state}_{timeout}")
137
- emit_info(
138
- f"[bold white on blue] BROWSER WAIT FOR LOAD [/bold white on blue] ⏱️ state={state} timeout={timeout}ms",
139
- message_group=group_id,
140
- )
141
- try:
142
- browser_manager = get_camoufox_manager()
143
- page = await browser_manager.get_current_page()
144
-
145
- if not page:
146
- return {"success": False, "error": "No active browser page available"}
147
-
148
- await page.wait_for_load_state(state, timeout=timeout)
149
-
150
- return {"success": True, "state": state, "url": page.url}
151
-
152
- except Exception as e:
153
- return {"success": False, "error": str(e), "state": state}
154
-
155
-
156
- def register_navigate_to_url(agent):
157
- """Register the navigation tool."""
158
-
159
- @agent.tool
160
- async def browser_navigate(context: RunContext, url: str) -> Dict[str, Any]:
161
- """
162
- Navigate the browser to a specific URL.
163
-
164
- Args:
165
- url: The URL to navigate to (must include protocol like https://)
166
-
167
- Returns:
168
- Dict with navigation results including final URL and page title
169
- """
170
- return await navigate_to_url(url)
171
-
172
-
173
- def register_get_page_info(agent):
174
- """Register the page info tool."""
175
-
176
- @agent.tool
177
- async def browser_get_page_info(context: RunContext) -> Dict[str, Any]:
178
- """
179
- Get information about the current page.
180
-
181
- Returns:
182
- Dict with current URL and page title
183
- """
184
- return await get_page_info()
185
-
186
-
187
- def register_browser_go_back(agent):
188
- """Register browser go back tool."""
189
-
190
- @agent.tool
191
- async def browser_go_back(context: RunContext) -> Dict[str, Any]:
192
- """
193
- Navigate back in browser history.
194
-
195
- Returns:
196
- Dict with navigation results
197
- """
198
- return await go_back()
199
-
200
-
201
- def register_browser_go_forward(agent):
202
- """Register browser go forward tool."""
203
-
204
- @agent.tool
205
- async def browser_go_forward(context: RunContext) -> Dict[str, Any]:
206
- """
207
- Navigate forward in browser history.
208
-
209
- Returns:
210
- Dict with navigation results
211
- """
212
- return await go_forward()
213
-
214
-
215
- def register_reload_page(agent):
216
- """Register the page reload tool."""
217
-
218
- @agent.tool
219
- async def browser_reload(
220
- context: RunContext, wait_until: str = "domcontentloaded"
221
- ) -> Dict[str, Any]:
222
- """
223
- Reload the current page.
224
-
225
- Args:
226
- wait_until: Load state to wait for (networkidle, domcontentloaded, load)
227
-
228
- Returns:
229
- Dict with reload results
230
- """
231
- return await reload_page(wait_until)
232
-
233
-
234
- def register_wait_for_load_state(agent):
235
- """Register the wait for load state tool."""
236
-
237
- @agent.tool
238
- async def browser_wait_for_load(
239
- context: RunContext, state: str = "domcontentloaded", timeout: int = 30000
240
- ) -> Dict[str, Any]:
241
- """
242
- Wait for the page to reach a specific load state.
243
-
244
- Args:
245
- state: Load state to wait for (networkidle, domcontentloaded, load)
246
- timeout: Timeout in milliseconds
247
-
248
- Returns:
249
- Dict with wait results
250
- """
251
- return await wait_for_load_state(state, timeout)
@@ -1,278 +0,0 @@
1
- """Screenshot and visual analysis tool with VQA capabilities."""
2
-
3
- from datetime import datetime
4
- from pathlib import Path
5
- from typing import Any, Dict, Optional
6
-
7
- from pydantic import BaseModel
8
- from pydantic_ai import RunContext
9
-
10
- from code_puppy.messaging import emit_info
11
- from code_puppy.tools.common import generate_group_id
12
-
13
- from .camoufox_manager import get_camoufox_manager
14
-
15
-
16
- class VisualAnalysisResult(BaseModel):
17
- """Result from visual analysis."""
18
-
19
- answer: str
20
- confidence: float
21
- observations: str
22
-
23
-
24
- class ScreenshotResult(BaseModel):
25
- """Result from screenshot operation."""
26
-
27
- success: bool
28
- screenshot_path: Optional[str] = None
29
- screenshot_data: Optional[bytes] = None
30
- timestamp: Optional[str] = None
31
- error: Optional[str] = None
32
-
33
-
34
- async def _capture_screenshot(
35
- page,
36
- full_page: bool = False,
37
- element_selector: Optional[str] = None,
38
- save_screenshot: bool = True,
39
- group_id: Optional[str] = None,
40
- ) -> Dict[str, Any]:
41
- """Internal screenshot capture function."""
42
- try:
43
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
44
-
45
- # Take screenshot
46
- if element_selector:
47
- # Screenshot specific element
48
- element = await page.locator(element_selector).first
49
- if not await element.is_visible():
50
- return {
51
- "success": False,
52
- "error": f"Element '{element_selector}' is not visible",
53
- }
54
- screenshot_data = await element.screenshot()
55
- else:
56
- # Screenshot page or full page
57
- screenshot_data = await page.screenshot(full_page=full_page)
58
-
59
- result = {
60
- "success": True,
61
- "screenshot_data": screenshot_data,
62
- "timestamp": timestamp,
63
- }
64
-
65
- # Save to disk if requested
66
- if save_screenshot:
67
- screenshot_dir = Path("screenshots")
68
- screenshot_dir.mkdir(exist_ok=True)
69
-
70
- filename = f"screenshot_{timestamp}.png"
71
- screenshot_path = screenshot_dir / filename
72
-
73
- with open(screenshot_path, "wb") as f:
74
- f.write(screenshot_data)
75
-
76
- result["screenshot_path"] = str(screenshot_path)
77
- if group_id:
78
- emit_info(
79
- f"[green]Screenshot saved: {screenshot_path}[/green]",
80
- message_group=group_id,
81
- )
82
- else:
83
- emit_info(f"[green]Screenshot saved: {screenshot_path}[/green]")
84
-
85
- return result
86
-
87
- except Exception as e:
88
- return {"success": False, "error": str(e)}
89
-
90
-
91
- async def take_screenshot_and_analyze(
92
- question: str,
93
- full_page: bool = False,
94
- element_selector: Optional[str] = None,
95
- save_screenshot: bool = True,
96
- ) -> Dict[str, Any]:
97
- """
98
- Take a screenshot and analyze it using visual understanding.
99
-
100
- Args:
101
- question: The specific question to ask about the screenshot
102
- full_page: Whether to capture the full page or just viewport
103
- element_selector: Optional selector to screenshot just a specific element
104
- save_screenshot: Whether to save the screenshot to disk
105
-
106
- Returns:
107
- Dict containing analysis results and screenshot info
108
- """
109
- target = element_selector or ("full_page" if full_page else "viewport")
110
- group_id = generate_group_id(
111
- "browser_screenshot_analyze", f"{question[:50]}_{target}"
112
- )
113
- emit_info(
114
- f"[bold white on blue] BROWSER SCREENSHOT ANALYZE [/bold white on blue] 📷 question='{question[:100]}{'...' if len(question) > 100 else ''}' target={target}",
115
- message_group=group_id,
116
- )
117
- try:
118
- # Get the current browser page
119
- browser_manager = get_camoufox_manager()
120
- page = await browser_manager.get_current_page()
121
-
122
- if not page:
123
- return {
124
- "success": False,
125
- "error": "No active browser page available. Please navigate to a webpage first.",
126
- "question": question,
127
- }
128
-
129
- # Take screenshot
130
- screenshot_result = await _capture_screenshot(
131
- page,
132
- full_page=full_page,
133
- element_selector=element_selector,
134
- save_screenshot=save_screenshot,
135
- group_id=group_id,
136
- )
137
-
138
- if not screenshot_result["success"]:
139
- return {
140
- "success": False,
141
- "error": screenshot_result.get("error", "Screenshot failed"),
142
- "question": question,
143
- }
144
-
145
- # For now, return screenshot info without VQA analysis
146
- # VQA would require integration with vision models
147
- emit_info(
148
- f"[yellow]Screenshot captured for question: {question}[/yellow]",
149
- message_group=group_id,
150
- )
151
- emit_info(
152
- "[dim]Note: Visual question answering requires vision model integration[/dim]"
153
- )
154
-
155
- return {
156
- "success": True,
157
- "question": question,
158
- "answer": "Screenshot captured successfully. Visual analysis requires vision model integration.",
159
- "confidence": 1.0,
160
- "observations": "Screenshot taken and saved to disk.",
161
- "screenshot_info": {
162
- "path": screenshot_result.get("screenshot_path"),
163
- "size": len(screenshot_result["screenshot_data"])
164
- if screenshot_result["screenshot_data"]
165
- else 0,
166
- "timestamp": screenshot_result.get("timestamp"),
167
- "full_page": full_page,
168
- "element_selector": element_selector,
169
- },
170
- }
171
-
172
- except Exception as e:
173
- emit_info(
174
- f"[red]Screenshot analysis failed: {str(e)}[/red]", message_group=group_id
175
- )
176
- return {"success": False, "error": str(e), "question": question}
177
-
178
-
179
- async def simple_screenshot(
180
- full_page: bool = False,
181
- element_selector: Optional[str] = None,
182
- save_screenshot: bool = True,
183
- ) -> Dict[str, Any]:
184
- """
185
- Take a simple screenshot without analysis.
186
-
187
- Args:
188
- full_page: Whether to capture the full page or just viewport
189
- element_selector: Optional selector to screenshot just a specific element
190
- save_screenshot: Whether to save the screenshot to disk
191
-
192
- Returns:
193
- Dict containing screenshot info
194
- """
195
- target = element_selector or ("full_page" if full_page else "viewport")
196
- group_id = generate_group_id("browser_screenshot", target)
197
- emit_info(
198
- f"[bold white on blue] BROWSER SCREENSHOT [/bold white on blue] 📷 target={target} save={save_screenshot}",
199
- message_group=group_id,
200
- )
201
- try:
202
- browser_manager = get_camoufox_manager()
203
- page = await browser_manager.get_current_page()
204
-
205
- if not page:
206
- return {"success": False, "error": "No active browser page available"}
207
-
208
- screenshot_result = await _capture_screenshot(
209
- page,
210
- full_page=full_page,
211
- element_selector=element_selector,
212
- save_screenshot=save_screenshot,
213
- group_id=group_id,
214
- )
215
-
216
- return screenshot_result
217
-
218
- except Exception as e:
219
- return {"success": False, "error": str(e)}
220
-
221
-
222
- def register_take_screenshot_and_analyze(agent):
223
- """Register the screenshot analysis tool."""
224
-
225
- @agent.tool
226
- async def browser_screenshot_analyze(
227
- context: RunContext,
228
- question: str,
229
- full_page: bool = False,
230
- element_selector: Optional[str] = None,
231
- save_screenshot: bool = True,
232
- ) -> Dict[str, Any]:
233
- """
234
- Take a screenshot and analyze it to answer a specific question.
235
-
236
- Args:
237
- question: The specific question to ask about the screenshot
238
- full_page: Whether to capture the full page or just viewport
239
- element_selector: Optional CSS/XPath selector to screenshot specific element
240
- save_screenshot: Whether to save the screenshot to disk
241
-
242
- Returns:
243
- Dict with analysis results including answer, confidence, and observations
244
- """
245
- return await take_screenshot_and_analyze(
246
- question=question,
247
- full_page=full_page,
248
- element_selector=element_selector,
249
- save_screenshot=save_screenshot,
250
- )
251
-
252
-
253
- def register_simple_screenshot(agent):
254
- """Register the simple screenshot tool."""
255
-
256
- @agent.tool
257
- async def browser_simple_screenshot(
258
- context: RunContext,
259
- full_page: bool = False,
260
- element_selector: Optional[str] = None,
261
- save_screenshot: bool = True,
262
- ) -> Dict[str, Any]:
263
- """
264
- Take a simple screenshot without analysis.
265
-
266
- Args:
267
- full_page: Whether to capture the full page or just viewport
268
- element_selector: Optional CSS/XPath selector to screenshot specific element
269
- save_screenshot: Whether to save the screenshot to disk
270
-
271
- Returns:
272
- Dict with screenshot info including path and metadata
273
- """
274
- return await simple_screenshot(
275
- full_page=full_page,
276
- element_selector=element_selector,
277
- save_screenshot=save_screenshot,
278
- )