PyPI - code-puppy - Versions diffs - 0.0.348__py3-none-any.whl → 0.0.361__py3-none-any.whl - Mend

code-puppy 0.0.348py3-none-any.whl → 0.0.361py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

code_puppy/agents/__init__.py +2 -0
code_puppy/agents/agent_manager.py +49 -0
code_puppy/agents/agent_pack_leader.py +383 -0
code_puppy/agents/agent_qa_kitten.py +12 -7
code_puppy/agents/agent_terminal_qa.py +323 -0
code_puppy/agents/base_agent.py +17 -4
code_puppy/agents/event_stream_handler.py +101 -8
code_puppy/agents/pack/__init__.py +34 -0
code_puppy/agents/pack/bloodhound.py +304 -0
code_puppy/agents/pack/husky.py +321 -0
code_puppy/agents/pack/retriever.py +393 -0
code_puppy/agents/pack/shepherd.py +348 -0
code_puppy/agents/pack/terrier.py +287 -0
code_puppy/agents/pack/watchdog.py +367 -0
code_puppy/agents/subagent_stream_handler.py +276 -0
code_puppy/api/__init__.py +13 -0
code_puppy/api/app.py +169 -0
code_puppy/api/main.py +21 -0
code_puppy/api/pty_manager.py +446 -0
code_puppy/api/routers/__init__.py +12 -0
code_puppy/api/routers/agents.py +36 -0
code_puppy/api/routers/commands.py +217 -0
code_puppy/api/routers/config.py +74 -0
code_puppy/api/routers/sessions.py +232 -0
code_puppy/api/templates/terminal.html +361 -0
code_puppy/api/websocket.py +154 -0
code_puppy/callbacks.py +73 -0
code_puppy/claude_cache_client.py +249 -34
code_puppy/command_line/core_commands.py +85 -0
code_puppy/config.py +66 -62
code_puppy/messaging/__init__.py +15 -0
code_puppy/messaging/messages.py +27 -0
code_puppy/messaging/queue_console.py +1 -1
code_puppy/messaging/rich_renderer.py +36 -1
code_puppy/messaging/spinner/__init__.py +20 -2
code_puppy/messaging/subagent_console.py +461 -0
code_puppy/model_utils.py +54 -0
code_puppy/plugins/antigravity_oauth/antigravity_model.py +90 -19
code_puppy/plugins/antigravity_oauth/transport.py +1 -0
code_puppy/plugins/frontend_emitter/__init__.py +25 -0
code_puppy/plugins/frontend_emitter/emitter.py +121 -0
code_puppy/plugins/frontend_emitter/register_callbacks.py +261 -0
code_puppy/prompts/antigravity_system_prompt.md +1 -0
code_puppy/status_display.py +6 -2
code_puppy/tools/__init__.py +37 -1
code_puppy/tools/agent_tools.py +83 -33
code_puppy/tools/browser/__init__.py +37 -0
code_puppy/tools/browser/browser_control.py +6 -6
code_puppy/tools/browser/browser_interactions.py +21 -20
code_puppy/tools/browser/browser_locators.py +9 -9
code_puppy/tools/browser/browser_navigation.py +7 -7
code_puppy/tools/browser/browser_screenshot.py +78 -140
code_puppy/tools/browser/browser_scripts.py +15 -13
code_puppy/tools/browser/camoufox_manager.py +226 -64
code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
code_puppy/tools/browser/terminal_command_tools.py +521 -0
code_puppy/tools/browser/terminal_screenshot_tools.py +556 -0
code_puppy/tools/browser/terminal_tools.py +525 -0
code_puppy/tools/command_runner.py +292 -101
code_puppy/tools/common.py +176 -1
code_puppy/tools/display.py +84 -0
code_puppy/tools/subagent_context.py +158 -0
{code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/METADATA +13 -11
{code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/RECORD +69 -38
code_puppy/tools/browser/vqa_agent.py +0 -90
{code_puppy-0.0.348.data → code_puppy-0.0.361.data}/data/code_puppy/models.json +0 -0
{code_puppy-0.0.348.data → code_puppy-0.0.361.data}/data/code_puppy/models_dev_api.json +0 -0
{code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/WHEEL +0 -0
{code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/entry_points.txt +0 -0
{code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/licenses/LICENSE +0 -0

code_puppy/tools/browser/browser_screenshot.py CHANGED Viewed

@@ -1,19 +1,21 @@
-"""Screenshot and visual analysis tool with VQA capabilities."""
+"""Screenshot tool for browser automation.
-import asyncio
+Captures screenshots and returns them via ToolReturn with BinaryContent
+so multimodal models can directly see and analyze - no separate VQA agent needed.
+"""
+import time
 from datetime import datetime
 from pathlib import Path
 from tempfile import gettempdir, mkdtemp
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
-from pydantic import BaseModel
-from pydantic_ai import RunContext
+from pydantic_ai import BinaryContent, RunContext, ToolReturn
 from code_puppy.messaging import emit_error, emit_info, emit_success
 from code_puppy.tools.common import generate_group_id
-from .camoufox_manager import get_camoufox_manager
-from .vqa_agent import run_vqa_analysis
+from .camoufox_manager import get_session_browser_manager
 _TEMP_SCREENSHOT_ROOT = Path(
     mkdtemp(prefix="code_puppy_screenshots_", dir=gettempdir())
@@ -21,21 +23,11 @@ _TEMP_SCREENSHOT_ROOT = Path(
 def _build_screenshot_path(timestamp: str) -> Path:
-    """Return the target path for a screenshot using a shared temp directory."""
+    """Return the target path for a screenshot."""
     filename = f"screenshot_{timestamp}.png"
     return _TEMP_SCREENSHOT_ROOT / filename
-class ScreenshotResult(BaseModel):
-    """Result from screenshot operation."""
-    success: bool
-    screenshot_path: Optional[str] = None
-    screenshot_data: Optional[bytes] = None
-    timestamp: Optional[str] = None
-    error: Optional[str] = None
 async def _capture_screenshot(
     page,
     full_page: bool = False,
@@ -45,41 +37,37 @@ async def _capture_screenshot(
 ) -> Dict[str, Any]:
     """Internal screenshot capture function."""
     try:
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
         # Take screenshot
         if element_selector:
-            # Screenshot specific element
             element = await page.locator(element_selector).first
             if not await element.is_visible():
                 return {
                     "success": False,
                     "error": f"Element '{element_selector}' is not visible",
                 }
-            screenshot_data = await element.screenshot()
+            screenshot_bytes = await element.screenshot()
         else:
-            # Screenshot page or full page
-            screenshot_data = await page.screenshot(full_page=full_page)
+            screenshot_bytes = await page.screenshot(full_page=full_page)
-        result = {
+        result: Dict[str, Any] = {
             "success": True,
-            "screenshot_data": screenshot_data,
+            "screenshot_bytes": screenshot_bytes,
             "timestamp": timestamp,
         }
         if save_screenshot:
             screenshot_path = _build_screenshot_path(timestamp)
             screenshot_path.parent.mkdir(parents=True, exist_ok=True)
             with open(screenshot_path, "wb") as f:
-                f.write(screenshot_data)
+                f.write(screenshot_bytes)
             result["screenshot_path"] = str(screenshot_path)
-            message = f"Screenshot saved: {screenshot_path}"
             if group_id:
-                emit_success(message, message_group=group_id)
-            else:
-                emit_success(message)
+                emit_success(
+                    f"Screenshot saved: {screenshot_path}", message_group=group_id
+                )
         return result
@@ -87,46 +75,42 @@ async def _capture_screenshot(
         return {"success": False, "error": str(e)}
-async def take_screenshot_and_analyze(
-    question: str,
+async def take_screenshot(
     full_page: bool = False,
     element_selector: Optional[str] = None,
     save_screenshot: bool = True,
-) -> Dict[str, Any]:
-    """
-    Take a screenshot and analyze it using visual understanding.
+) -> Union[ToolReturn, Dict[str, Any]]:
+    """Take a screenshot of the browser page.
+    Returns a ToolReturn with BinaryContent so multimodal models can
+    directly see and analyze the screenshot.
     Args:
-        question: The specific question to ask about the screenshot
-        full_page: Whether to capture the full page or just viewport
-        element_selector: Optional selector to screenshot just a specific element
-        save_screenshot: Whether to save the screenshot to disk
+        full_page: Whether to capture full page or just viewport.
+        element_selector: Optional selector to screenshot specific element.
+        save_screenshot: Whether to save the screenshot to disk.
     Returns:
-        Dict containing analysis results and screenshot info
+        ToolReturn containing:
+            - return_value: Success message with screenshot path
+            - content: List with description and BinaryContent image
+            - metadata: Screenshot details (path, target, timestamp)
+        Or Dict with error info if failed.
     """
     target = element_selector or ("full_page" if full_page else "viewport")
-    group_id = generate_group_id(
-        "browser_screenshot_analyze", f"{question[:50]}_{target}"
-    )
-    emit_info(
-        f"BROWSER SCREENSHOT ANALYZE 📷 question='{question[:100]}{'...' if len(question) > 100 else ''}' target={target}",
-        message_group=group_id,
-    )
+    group_id = generate_group_id("browser_screenshot", target)
+    emit_info(f"BROWSER SCREENSHOT 📷 target={target}", message_group=group_id)
     try:
-        # Get the current browser page
-        browser_manager = get_camoufox_manager()
+        browser_manager = get_session_browser_manager()
         page = await browser_manager.get_current_page()
         if not page:
-            return {
-                "success": False,
-                "error": "No active browser page available. Please navigate to a webpage first.",
-                "question": question,
-            }
+            error_msg = "No active browser page. Navigate to a webpage first."
+            emit_error(error_msg, message_group=group_id)
+            return {"success": False, "error": error_msg}
-        # Take screenshot
-        screenshot_result = await _capture_screenshot(
+        result = await _capture_screenshot(
             page,
             full_page=full_page,
             element_selector=element_selector,
@@ -134,108 +118,62 @@ async def take_screenshot_and_analyze(
             group_id=group_id,
         )
-        if not screenshot_result["success"]:
-            error_message = screenshot_result.get("error", "Screenshot failed")
-            emit_error(
-                f"Screenshot capture failed: {error_message}",
-                message_group=group_id,
-            )
-            return {
-                "success": False,
-                "error": error_message,
-                "question": question,
-            }
-        screenshot_bytes = screenshot_result.get("screenshot_data")
-        if not screenshot_bytes:
-            emit_error(
-                "Screenshot captured but pixel data missing; cannot run visual analysis.",
-                message_group=group_id,
-            )
-            return {
-                "success": False,
-                "error": "Screenshot captured but no image bytes available for analysis.",
-                "question": question,
-            }
-        try:
-            vqa_result = await asyncio.to_thread(
-                run_vqa_analysis,
-                question,
-                screenshot_bytes,
-            )
-        except Exception as exc:
-            emit_error(
-                f"Visual question answering failed: {exc}",
-                message_group=group_id,
-            )
-            return {
-                "success": False,
-                "error": f"Visual analysis failed: {exc}",
-                "question": question,
-                "screenshot_info": {
-                    "path": screenshot_result.get("screenshot_path"),
-                    "timestamp": screenshot_result.get("timestamp"),
-                    "full_page": full_page,
-                    "element_selector": element_selector,
-                },
-            }
-        emit_success(
-            f"Visual analysis answer: {vqa_result.answer}",
-            message_group=group_id,
-        )
-        emit_info(
-            f"Observations: {vqa_result.observations}",
-            message_group=group_id,
-        )
-        return {
-            "success": True,
-            "question": question,
-            "answer": vqa_result.answer,
-            "confidence": vqa_result.confidence,
-            "observations": vqa_result.observations,
-            "screenshot_info": {
-                "path": screenshot_result.get("screenshot_path"),
-                "size": len(screenshot_bytes),
-                "timestamp": screenshot_result.get("timestamp"),
+        if not result["success"]:
+            emit_error(result.get("error", "Screenshot failed"), message_group=group_id)
+            return {"success": False, "error": result.get("error")}
+        screenshot_path = result.get("screenshot_path", "(not saved)")
+        # Return as ToolReturn with BinaryContent so the model can SEE the image!
+        return ToolReturn(
+            return_value=f"Screenshot captured successfully. Saved to: {screenshot_path}",
+            content=[
+                f"Here's the browser screenshot ({target}):",
+                BinaryContent(
+                    data=result["screenshot_bytes"],
+                    media_type="image/png",
+                ),
+                "Please analyze what you see and describe any relevant details.",
+            ],
+            metadata={
+                "success": True,
+                "screenshot_path": screenshot_path,
+                "target": target,
                 "full_page": full_page,
                 "element_selector": element_selector,
+                "timestamp": time.time(),
             },
-        }
+        )
     except Exception as e:
-        emit_error(f"Screenshot analysis failed: {str(e)}", message_group=group_id)
-        return {"success": False, "error": str(e), "question": question}
+        error_msg = f"Screenshot failed: {str(e)}"
+        emit_error(error_msg, message_group=group_id)
+        return {"success": False, "error": error_msg}
 def register_take_screenshot_and_analyze(agent):
-    """Register the screenshot analysis tool."""
+    """Register the screenshot tool."""
     @agent.tool
     async def browser_screenshot_analyze(
         context: RunContext,
-        question: str,
         full_page: bool = False,
         element_selector: Optional[str] = None,
-        save_screenshot: bool = True,
-    ) -> Dict[str, Any]:
+    ) -> Union[ToolReturn, Dict[str, Any]]:
         """
-        Take a screenshot and analyze it to answer a specific question.
+        Take a screenshot of the browser page.
+        Returns the screenshot via ToolReturn with BinaryContent that you can
+        see directly. Use this to see what's displayed in the browser.
         Args:
-            question: The specific question to ask about the screenshot
-            full_page: Whether to capture the full page or just viewport
-            element_selector: Optional CSS/XPath selector to screenshot specific element
-            save_screenshot: Whether to save the screenshot to disk
+            full_page: Capture full page (True) or just viewport (False).
+            element_selector: Optional CSS selector to screenshot specific element.
         Returns:
-            Dict with analysis results including answer, confidence, and observations
+            ToolReturn with the screenshot image you can analyze, or error dict.
         """
-        return await take_screenshot_and_analyze(
-            question=question,
+        return await take_screenshot(
             full_page=full_page,
             element_selector=element_selector,
-            save_screenshot=save_screenshot,
         )

code_puppy/tools/browser/browser_scripts.py CHANGED Viewed

@@ -7,7 +7,7 @@ from pydantic_ai import RunContext
 from code_puppy.messaging import emit_error, emit_info, emit_success
 from code_puppy.tools.common import generate_group_id
-from .camoufox_manager import get_camoufox_manager
+from .camoufox_manager import get_session_browser_manager
 async def execute_javascript(
@@ -21,14 +21,16 @@ async def execute_javascript(
         message_group=group_id,
     )
     try:
-        browser_manager = get_camoufox_manager()
+        browser_manager = get_session_browser_manager()
         page = await browser_manager.get_current_page()
         if not page:
             return {"success": False, "error": "No active browser page available"}
         # Execute JavaScript
-        result = await page.evaluate(script, timeout=timeout)
+        # Note: page.evaluate() does NOT accept a timeout parameter
+        # The timeout arg to this function is kept for API compatibility but unused
+        result = await page.evaluate(script)
         emit_success("JavaScript executed successfully", message_group=group_id)
@@ -52,7 +54,7 @@ async def scroll_page(
         message_group=group_id,
     )
     try:
-        browser_manager = get_camoufox_manager()
+        browser_manager = get_session_browser_manager()
         page = await browser_manager.get_current_page()
         if not page:
@@ -60,7 +62,7 @@ async def scroll_page(
         if element_selector:
             # Scroll specific element
-            element = page.locator(element_selector)
+            element = page.locator(element_selector).first
             await element.scroll_into_view_if_needed()
             # Get element's current scroll position and dimensions
@@ -146,13 +148,13 @@ async def scroll_to_element(
         message_group=group_id,
     )
     try:
-        browser_manager = get_camoufox_manager()
+        browser_manager = get_session_browser_manager()
         page = await browser_manager.get_current_page()
         if not page:
             return {"success": False, "error": "No active browser page available"}
-        element = page.locator(selector)
+        element = page.locator(selector).first
         await element.wait_for(state="attached", timeout=timeout)
         await element.scroll_into_view_if_needed()
@@ -178,7 +180,7 @@ async def set_viewport_size(
         message_group=group_id,
     )
     try:
-        browser_manager = get_camoufox_manager()
+        browser_manager = get_session_browser_manager()
         page = await browser_manager.get_current_page()
         if not page:
@@ -209,13 +211,13 @@ async def wait_for_element(
         message_group=group_id,
     )
     try:
-        browser_manager = get_camoufox_manager()
+        browser_manager = get_session_browser_manager()
         page = await browser_manager.get_current_page()
         if not page:
             return {"success": False, "error": "No active browser page available"}
-        element = page.locator(selector)
+        element = page.locator(selector).first
         await element.wait_for(state=state, timeout=timeout)
         emit_success(f"Element {selector} is now {state}", message_group=group_id)
@@ -240,13 +242,13 @@ async def highlight_element(
         message_group=group_id,
     )
     try:
-        browser_manager = get_camoufox_manager()
+        browser_manager = get_session_browser_manager()
         page = await browser_manager.get_current_page()
         if not page:
             return {"success": False, "error": "No active browser page available"}
-        element = page.locator(selector)
+        element = page.locator(selector).first
         await element.wait_for(state="visible", timeout=timeout)
         # Add highlight style
@@ -277,7 +279,7 @@ async def clear_highlights() -> Dict[str, Any]:
         message_group=group_id,
     )
     try:
-        browser_manager = get_camoufox_manager()
+        browser_manager = get_session_browser_manager()
         page = await browser_manager.get_current_page()
         if not page:

code-puppy 0.0.348__py3-none-any.whl → 0.0.361__py3-none-any.whl

code-puppy 0.0.348py3-none-any.whl → 0.0.361py3-none-any.whl