PyPI - code-puppy - Versions diffs - 0.0.359__py3-none-any.whl → 0.0.360__py3-none-any.whl - Mend

code-puppy 0.0.359py3-none-any.whl → 0.0.360py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

code_puppy/agents/agent_qa_kitten.py CHANGED Viewed

@@ -16,7 +16,7 @@ class QualityAssuranceKittenAgent(BaseAgent):
     @property
     def description(self) -> str:
-        return "Advanced web browser automation and quality assurance testing using Playwright with VQA capabilities"
+        return "Advanced web browser automation and quality assurance testing using Playwright with visual analysis capabilities"
     def get_available_tools(self) -> list[str]:
         """Get the list of tools available to Web Browser Puppy."""
@@ -63,8 +63,9 @@ class QualityAssuranceKittenAgent(BaseAgent):
             "browser_wait_for_element",
             "browser_highlight_element",
             "browser_clear_highlights",
-            # Screenshots and VQA (uses dedicated VQA agent for context management)
-            "browser_screenshot_vqa",
+            # Screenshots (returns BinaryContent for direct visual analysis)
+            "browser_screenshot_analyze",
+            "load_image_for_analysis",
             # Workflow management
             "browser_save_workflow",
             "browser_list_workflows",
@@ -78,7 +79,7 @@ You are Quality Assurance Kitten 🐱, an advanced autonomous browser automation
 You specialize in:
 🎯 **Quality Assurance Testing** - automated testing of web applications and user workflows
-👁️ **Visual verification** - taking screenshots and analyzing page content for bugs
+👁️ **Visual verification** - taking screenshots you can directly see and analyze for bugs
 🔍 **Element discovery** - finding elements using semantic locators and accessibility best practices
 📝 **Data extraction** - scraping content and gathering information from web pages
 🧪 **Web automation** - filling forms, clicking buttons, navigating sites with precision
@@ -117,12 +118,10 @@ For any browser task, follow this approach:
 ### Visual Verification Workflow
 - **Before critical actions**: Use browser_highlight_element to visually confirm
-- **After interactions**: Use browser_screenshot_vqa to verify results
-- **Ask specific questions**: The VQA tool requires a question like:
-  - "Is the login button visible?"
-  - "What error message is displayed?"
-  - "Is the form filled out correctly?"
-  - "What is the main heading text?"
+- **After interactions**: Use browser_screenshot_analyze to verify results
+- The screenshot is returned directly as an image you can see and analyze
+- No need to ask questions - just analyze what you see in the returned image
+- Use load_image_for_analysis to load mockups or reference images for comparison
 ### Form Input Best Practices
 - **ALWAYS check current values** with browser_get_value before typing
@@ -135,14 +134,15 @@ For any browser task, follow this approach:
 **When Element Discovery Fails:**
 1. Try different semantic locators first
 2. Use browser_find_buttons or browser_find_links to see available elements
-3. Take a screenshot with browser_screenshot_analyze to understand the page layout
+3. Take a screenshot with browser_screenshot_analyze to see and understand the page layout
 4. Only use XPath as absolute last resort
 **When Page Interactions Fail:**
 1. Check if element is visible with browser_wait_for_element
 2. Scroll element into view with browser_scroll_to_element
 3. Use browser_highlight_element to confirm element location
-4. Try browser_execute_js for complex interactions
+4. Take a screenshot with browser_screenshot_analyze to see the actual page state
+5. Try browser_execute_js for complex interactions
 ### JavaScript Execution
 - Use browser_execute_js for:
@@ -187,7 +187,7 @@ For any browser task, follow this approach:
 ## Specialized Capabilities
 🌐 **WCAG 2.2 Level AA Compliance**: Always prioritize accessibility in element discovery
-📸 **Visual Question Answering**: Use browser_screenshot_vqa for intelligent page analysis (uses dedicated VQA agent)
+📸 **Direct Visual Analysis**: Use browser_screenshot_analyze to see and analyze page content directly
 🚀 **Semantic Web Navigation**: Prefer role-based and label-based element discovery
 ⚡ **Playwright Power**: Full access to modern browser automation capabilities
 📋 **Workflow Management**: Save, load, and reuse automation patterns for consistency

code_puppy/config.py CHANGED Viewed

@@ -98,7 +98,6 @@ _CURRENT_AUTOSAVE_ID: Optional[str] = None
 _model_validation_cache = {}
 _default_model_cache = None
 _default_vision_model_cache = None
-_default_vqa_model_cache = None
 def ensure_config_exists():
@@ -358,47 +357,6 @@ def _default_vision_model_from_models_json() -> str:
         return "gpt-4.1"
-def _default_vqa_model_from_models_json() -> str:
-    """Select a default VQA-capable model, preferring vision-ready options."""
-    global _default_vqa_model_cache
-    if _default_vqa_model_cache is not None:
-        return _default_vqa_model_cache
-    try:
-        from code_puppy.model_factory import ModelFactory
-        models_config = ModelFactory.load_config()
-        if models_config:
-            # Allow explicit VQA hints if present
-            for name, config in models_config.items():
-                if config.get("supports_vqa"):
-                    _default_vqa_model_cache = name
-                    return name
-            # Reuse multimodal heuristics before falling back to generic default
-            preferred_candidates = (
-                "gpt-4.1",
-                "gpt-4.1-mini",
-                "claude-4-0-sonnet",
-                "gemini-2.5-flash-preview-05-20",
-                "gpt-4.1-nano",
-            )
-            for candidate in preferred_candidates:
-                if candidate in models_config:
-                    _default_vqa_model_cache = candidate
-                    return candidate
-            _default_vqa_model_cache = _default_model_from_models_json()
-            return _default_vqa_model_cache
-        _default_vqa_model_cache = "gpt-4.1"
-        return "gpt-4.1"
-    except Exception:
-        _default_vqa_model_cache = "gpt-4.1"
-        return "gpt-4.1"
 def _validate_model_exists(model_name: str) -> bool:
     """Check if a model exists in models.json with caching to avoid redundant calls."""
     global _model_validation_cache
@@ -424,15 +382,10 @@ def _validate_model_exists(model_name: str) -> bool:
 def clear_model_cache():
     """Clear the model validation cache. Call this when models.json changes."""
-    global \
-        _model_validation_cache, \
-        _default_model_cache, \
-        _default_vision_model_cache, \
-        _default_vqa_model_cache
+    global _model_validation_cache, _default_model_cache, _default_vision_model_cache
     _model_validation_cache.clear()
     _default_model_cache = None
     _default_vision_model_cache = None
-    _default_vqa_model_cache = None
 def model_supports_setting(model_name: str, setting: str) -> bool:
@@ -503,21 +456,6 @@ def set_model_name(model: str):
     clear_model_cache()
-def get_vqa_model_name() -> str:
-    """Return the configured VQA model, falling back to the global model."""
-    stored_model = get_value("vqa_model_name")
-    if stored_model and _validate_model_exists(stored_model):
-        return stored_model
-    # Fall back to the global model if no specific VQA model is set
-    return get_global_model_name()
-def set_vqa_model_name(model: str):
-    """Persist the configured VQA model name and refresh caches."""
-    set_config_value("vqa_model_name", model or "")
-    clear_model_cache()
 def get_puppy_token():
     """Returns the puppy_token from config, or None if not set."""
     return get_value("puppy_token")

code_puppy/tools/__init__.py CHANGED Viewed

@@ -41,9 +41,6 @@ from code_puppy.tools.browser.browser_navigation import (
 from code_puppy.tools.browser.browser_screenshot import (
     register_take_screenshot_and_analyze,
 )
-from code_puppy.tools.browser.browser_screenshot_vqa import (
-    register_take_screenshot_and_analyze_vqa,
-)
 from code_puppy.tools.browser.browser_scripts import (
     register_browser_clear_highlights,
     register_browser_highlight_element,
@@ -146,9 +143,8 @@ TOOL_REGISTRY = {
     "browser_wait_for_element": register_wait_for_element,
     "browser_highlight_element": register_browser_highlight_element,
     "browser_clear_highlights": register_browser_clear_highlights,
-    # Browser Screenshots and VQA
+    # Browser Screenshots
     "browser_screenshot_analyze": register_take_screenshot_and_analyze,
-    "browser_screenshot_vqa": register_take_screenshot_and_analyze_vqa,
     # Browser Workflows
     "browser_save_workflow": register_save_workflow,
     "browser_list_workflows": register_list_workflows,

code_puppy/tools/browser/__init__.py CHANGED Viewed

@@ -11,7 +11,6 @@ from .camoufox_manager import (
     get_session_browser_manager,
     set_browser_session,
 )
-from .vqa_agent import VisualAnalysisResult, run_vqa_analysis, run_vqa_analysis_stream
 def format_terminal_banner(text: str) -> str:
@@ -35,7 +34,4 @@ __all__ = [
     "get_browser_session",
     "get_session_browser_manager",
     "set_browser_session",
-    "VisualAnalysisResult",
-    "run_vqa_analysis",
-    "run_vqa_analysis_stream",
 ]

{code_puppy-0.0.359.dist-info → code_puppy-0.0.360.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: code-puppy
-Version: 0.0.359
+Version: 0.0.360
 Summary: Code generation agent
 Project-URL: repository, https://github.com/mpfaffenberger/code_puppy
 Project-URL: HomePage, https://github.com/mpfaffenberger/code_puppy

{code_puppy-0.0.359.dist-info → code_puppy-0.0.360.dist-info}/RECORD RENAMED Viewed

@@ -4,7 +4,7 @@ code_puppy/callbacks.py,sha256=Pp0VyeXJBEtk-N_RSWr5pbveelovsdLUiJ4f11dzwGw,10775
 code_puppy/chatgpt_codex_client.py,sha256=Om0ANB_kpHubhCwNzF9ENf8RvKBqs0IYzBLl_SNw0Vk,9833
 code_puppy/claude_cache_client.py,sha256=Gl6um5ZaKpcnxOvoFSM8Lwm_Vu4-VyWz8Nli8DnRLa4,22508
 code_puppy/cli_runner.py,sha256=w5CLKgQYYaT7My3Cga2StXYol-u6DBxNzzUuhhsfhsA,34952
-code_puppy/config.py,sha256=gwOK-WDuYBzJKwyGCFALJiW0pstiA39pRDm1O1zFek4,54528
+code_puppy/config.py,sha256=z4c-rKwQOEsg13HHd1KskIQG4Ygdr9krQsCAiZU-Wa0,52441
 code_puppy/error_logging.py,sha256=a80OILCUtJhexI6a9GM-r5LqIdjvSRzggfgPp2jv1X0,3297
 code_puppy/gemini_code_assist.py,sha256=KGS7sO5OLc83nDF3xxS-QiU6vxW9vcm6hmzilu79Ef8,13867
 code_puppy/http_utils.py,sha256=H3N5Qz2B1CcsGUYOycGWAqoNMr2P1NCVluKX3aRwRqI,10358
@@ -38,7 +38,7 @@ code_puppy/agents/agent_planning.py,sha256=6q3s5qCko2FcUfaLzImOFNDi0H61WBc2PNtsO
 code_puppy/agents/agent_python_programmer.py,sha256=R-7XoGIFJ58EY9LE9mWGcQQ8gSsMzi-1HD6wigJQPL8,6846
 code_puppy/agents/agent_python_reviewer.py,sha256=J8lqzoKJlohs8NWMbgUpHXNt1bXHNIkuGjzLd9Af8qE,5854
 code_puppy/agents/agent_qa_expert.py,sha256=5Ikb4U3SZQknUEfwlHZiyZXKqnffnOTQagr_wrkUkPk,10125
-code_puppy/agents/agent_qa_kitten.py,sha256=bjQdAPL_VMjSDn012mHQgnduuQkGG0JeXuC3T1KrU6g,9372
+code_puppy/agents/agent_qa_kitten.py,sha256=qvry-1u_CiXi8eRueHTax4OtqsS_mQrtXHsbTXWzGYs,9517
 code_puppy/agents/agent_security_auditor.py,sha256=SpiYNA0XAsIwBj7S2_EQPRslRUmF_-b89pIJyW7DYtY,12022
 code_puppy/agents/agent_terminal_qa.py,sha256=U-iyP7OBWdAmchW_oUU8k6asH2aignTMmgqqYDyf-ms,10343
 code_puppy/agents/agent_typescript_reviewer.py,sha256=vsnpp98xg6cIoFAEJrRTUM_i4wLEWGm5nJxs6fhHobM,10275
@@ -184,7 +184,7 @@ code_puppy/plugins/shell_safety/command_cache.py,sha256=adYtSPNVOZfW_6dQdtEihO6E
 code_puppy/plugins/shell_safety/register_callbacks.py,sha256=W3v664RR48Fdbbbltf_NnX22_Ahw2AvAOtvXvWc7KxQ,7322
 code_puppy/prompts/antigravity_system_prompt.md,sha256=ZaTfRyY57ttROyZMmOBtqZQu1to7sdTNTv8_0fTgPNw,6807
 code_puppy/prompts/codex_system_prompt.md,sha256=hEFTCziroLqZmqNle5kG34A8kvTteOWezCiVrAEKhE0,24400
-code_puppy/tools/__init__.py,sha256=WC1DO3OeTVSibpvIIoyfdxbeeC0oigiBSUqpmdw8G4o,7615
+code_puppy/tools/__init__.py,sha256=9bzVIjX9CAr2YTZkhD7IWFYt4KpnFRx6ge_Tqazugbs,7425
 code_puppy/tools/agent_tools.py,sha256=XvBQ_IPa4NHLmIA2mdyPwy9GPlYGQwhtdn-w_3i239g,25517
 code_puppy/tools/command_runner.py,sha256=Sresr_ykou_c2V1sKoNxqrqCQovKF5yDiQJ8r3E9lak,50995
 code_puppy/tools/common.py,sha256=lVtF94cn6jtC5YKfitV7L3rk37Ts2gMoHLQrqDFD2E4,46411
@@ -193,13 +193,12 @@ code_puppy/tools/file_modifications.py,sha256=vz9n7R0AGDSdLUArZr_55yJLkyI30M8zre
 code_puppy/tools/file_operations.py,sha256=CqhpuBnOFOcQCIYXOujskxq2VMLWYJhibYrH0YcPSfA,35692
 code_puppy/tools/subagent_context.py,sha256=zsiKV3B3DxZ_Y5IHHhtE-SMFDg_jMrY7Hi6r5LH--IU,4781
 code_puppy/tools/tools_content.py,sha256=bsBqW-ppd1XNAS_g50B3UHDQBWEALC1UneH6-afz1zo,2365
-code_puppy/tools/browser/__init__.py,sha256=HqP5_AKL9IuaXeGLhL_Y799DBU28QZBd2x5ISKJlprc,1097
+code_puppy/tools/browser/__init__.py,sha256=SPiEQwsDj5KoxDwX_viNUKFsn4tczxY-Jq2C64EzSNI,927
 code_puppy/tools/browser/browser_control.py,sha256=YntpjfWTIv0TDlAO5BqTV_hDbUBw-8wmMn29K3TDQo0,8430
 code_puppy/tools/browser/browser_interactions.py,sha256=ZyJmA2-ZtIATF76uGMt08cfVaYiqg7W2-cHfAzNI0F8,16775
 code_puppy/tools/browser/browser_locators.py,sha256=sxXNm-K087poeSp7Um5Gc1sZxb7HlSZOu0F0r2b0ty8,19177
 code_puppy/tools/browser/browser_navigation.py,sha256=RJdG14UXtA6wz4PNLw2Tqeu4oUDQilOyNbyTjgIFCrY,7416
 code_puppy/tools/browser/browser_screenshot.py,sha256=AJe9JbZv8vC93AFWzsAUlrg1YNshv4SWNde-O-_mfQU,6282
-code_puppy/tools/browser/browser_screenshot_vqa.py,sha256=DBdQuV7eIaJX2Qy_liwitfakIzrcVziB-zAGIngM5GE,7349
 code_puppy/tools/browser/browser_scripts.py,sha256=CYWdQMtjKTNvJNSCkB2vGo-MOzmT_gw2oFMGtkfuzuA,14779
 code_puppy/tools/browser/browser_workflows.py,sha256=nitW42vCf0ieTX1gLabozTugNQ8phtoFzZbiAhw1V90,6491
 code_puppy/tools/browser/camoufox_manager.py,sha256=WIr98SrGeC5jd6jX5tjhFR6A3janqV4tq9Mbznnlh44,13920
@@ -207,11 +206,10 @@ code_puppy/tools/browser/chromium_terminal_manager.py,sha256=w1thQ_ACb6oV45L93TS
 code_puppy/tools/browser/terminal_command_tools.py,sha256=9byOZku-dwvTtCl532xt7Lumed_jTn0sLvUe_X75XCQ,19068
 code_puppy/tools/browser/terminal_screenshot_tools.py,sha256=J_21YO_495NvYgNFu9KQP6VYg2K_f8CtSdZuF94Yhnw,18448
 code_puppy/tools/browser/terminal_tools.py,sha256=F5LjVH3udSCFHmqC3O1UJLoLozZFZsEdX42jOmkqkW0,17853
-code_puppy/tools/browser/vqa_agent.py,sha256=0IbS1X3l8ADZI9pGcJbKFoN0-ZuTJa8QvHZ_hGKBKRM,6339
-code_puppy-0.0.359.data/data/code_puppy/models.json,sha256=FMQdE_yvP_8y0xxt3K918UkFL9cZMYAqW1SfXcQkU_k,3105
-code_puppy-0.0.359.data/data/code_puppy/models_dev_api.json,sha256=wHjkj-IM_fx1oHki6-GqtOoCrRMR0ScK0f-Iz0UEcy8,548187
-code_puppy-0.0.359.dist-info/METADATA,sha256=sON8OiWf6tHAK4zV99qE3T1ouWDQVMOCm81jVNjcUCI,27614
-code_puppy-0.0.359.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-code_puppy-0.0.359.dist-info/entry_points.txt,sha256=Tp4eQC99WY3HOKd3sdvb22vZODRq0XkZVNpXOag_KdI,91
-code_puppy-0.0.359.dist-info/licenses/LICENSE,sha256=31u8x0SPgdOq3izJX41kgFazWsM43zPEF9eskzqbJMY,1075
-code_puppy-0.0.359.dist-info/RECORD,,
+code_puppy-0.0.360.data/data/code_puppy/models.json,sha256=FMQdE_yvP_8y0xxt3K918UkFL9cZMYAqW1SfXcQkU_k,3105
+code_puppy-0.0.360.data/data/code_puppy/models_dev_api.json,sha256=wHjkj-IM_fx1oHki6-GqtOoCrRMR0ScK0f-Iz0UEcy8,548187
+code_puppy-0.0.360.dist-info/METADATA,sha256=FZ7fXsTCXSepDHZqKoSNnjd_MyyMDx-Ntb2nnz3Jizg,27614
+code_puppy-0.0.360.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+code_puppy-0.0.360.dist-info/entry_points.txt,sha256=Tp4eQC99WY3HOKd3sdvb22vZODRq0XkZVNpXOag_KdI,91
+code_puppy-0.0.360.dist-info/licenses/LICENSE,sha256=31u8x0SPgdOq3izJX41kgFazWsM43zPEF9eskzqbJMY,1075
+code_puppy-0.0.360.dist-info/RECORD,,

code_puppy/tools/browser/browser_screenshot_vqa.py DELETED Viewed

@@ -1,195 +0,0 @@
-"""VQA-based Screenshot tool for browser automation (qa-kitten).
-This module provides screenshot analysis using a dedicated VQA agent.
-Unlike browser_screenshot.py which returns raw base64 bytes for multimodal
-models to see directly, this version offloads the visual analysis to a
-separate VQA agent, helping manage context in the calling agent.
-Use this for qa-kitten where context management is important.
-Use browser_screenshot.py for terminal-qa where direct image viewing is needed.
-"""
-from typing import Any, Dict, Optional
-from pydantic_ai import RunContext
-from rich.console import Console
-from code_puppy.messaging import emit_error, emit_info, emit_success
-from code_puppy.tools.common import generate_group_id
-from .browser_screenshot import _capture_screenshot
-from .camoufox_manager import get_session_browser_manager
-from .vqa_agent import run_vqa_analysis_stream
-async def take_screenshot_and_analyze(
-    question: str,
-    full_page: bool = False,
-    element_selector: Optional[str] = None,
-    save_screenshot: bool = True,
-) -> Dict[str, Any]:
-    """Take a screenshot and analyze it using the VQA agent.
-    This function captures a screenshot and passes it to a dedicated
-    VQA (Visual Question Answering) agent for analysis. The VQA agent
-    runs separately, keeping the image analysis out of the calling
-    agent's context window.
-    Args:
-        question: The question to ask about the screenshot.
-            Examples:
-            - "What buttons are visible on this page?"
-            - "Is there an error message displayed?"
-            - "What is the main heading text?"
-            - "Describe the layout of this form."
-        full_page: Whether to capture full page or just viewport.
-            Defaults to False (viewport only).
-        element_selector: Optional CSS selector to screenshot a specific
-            element instead of the whole page.
-        save_screenshot: Whether to save the screenshot to disk.
-    Returns:
-        Dict containing:
-            - success (bool): True if analysis succeeded.
-            - answer (str): The VQA agent's streamed answer to your question.
-            - screenshot_info (dict): Path, timestamp, and other metadata.
-            - error (str): Error message if unsuccessful.
-    """
-    target = element_selector or ("full_page" if full_page else "viewport")
-    group_id = generate_group_id(
-        "browser_screenshot_analyze", f"{question[:50]}_{target}"
-    )
-    emit_info(
-        f"BROWSER SCREENSHOT ANALYZE 📷 question='{question[:100]}{'...' if len(question) > 100 else ''}' target={target}",
-        message_group=group_id,
-    )
-    try:
-        # Get the browser page
-        browser_manager = get_session_browser_manager()
-        page = await browser_manager.get_current_page()
-        if not page:
-            error_msg = "No active browser page. Navigate to a webpage first."
-            emit_error(error_msg, message_group=group_id)
-            return {"success": False, "error": error_msg, "question": question}
-        # Capture the screenshot
-        screenshot_result = await _capture_screenshot(
-            page,
-            full_page=full_page,
-            element_selector=element_selector,
-            save_screenshot=save_screenshot,
-            group_id=group_id,
-        )
-        if not screenshot_result["success"]:
-            error_msg = screenshot_result.get("error", "Screenshot failed")
-            emit_error(
-                f"Screenshot capture failed: {error_msg}", message_group=group_id
-            )
-            return {"success": False, "error": error_msg, "question": question}
-        screenshot_bytes = screenshot_result.get("screenshot_bytes")
-        if not screenshot_bytes:
-            emit_error(
-                "Screenshot captured but pixel data missing; cannot run visual analysis.",
-                message_group=group_id,
-            )
-            return {
-                "success": False,
-                "error": "Screenshot captured but no image bytes available for analysis.",
-                "question": question,
-            }
-        # Run VQA analysis with streaming output
-        try:
-            console = Console()
-            console.print()  # Newline before streaming starts
-            console.print("[bold cyan]🔍 VQA Analysis:[/bold cyan]")
-            vqa_answer = await run_vqa_analysis_stream(
-                question,
-                screenshot_bytes,
-            )
-        except Exception as exc:
-            emit_error(
-                f"Visual question answering failed: {exc}",
-                message_group=group_id,
-            )
-            return {
-                "success": False,
-                "error": f"Visual analysis failed: {exc}",
-                "question": question,
-                "screenshot_info": {
-                    "path": screenshot_result.get("screenshot_path"),
-                    "timestamp": screenshot_result.get("timestamp"),
-                    "full_page": full_page,
-                    "element_selector": element_selector,
-                },
-            }
-        emit_success(
-            "Visual analysis complete",
-            message_group=group_id,
-        )
-        return {
-            "success": True,
-            "question": question,
-            "answer": vqa_answer,
-            "screenshot_info": {
-                "path": screenshot_result.get("screenshot_path"),
-                "size": len(screenshot_bytes),
-                "timestamp": screenshot_result.get("timestamp"),
-                "full_page": full_page,
-                "element_selector": element_selector,
-            },
-        }
-    except Exception as e:
-        error_msg = f"Screenshot analysis failed: {str(e)}"
-        emit_error(error_msg, message_group=group_id)
-        return {"success": False, "error": error_msg, "question": question}
-def register_take_screenshot_and_analyze_vqa(agent):
-    """Register the VQA-based screenshot tool.
-    This tool takes a screenshot and analyzes it using a separate VQA agent.
-    Use this for agents where context management is important (like qa-kitten).
-    """
-    @agent.tool
-    async def browser_screenshot_vqa(
-        context: RunContext,
-        question: str,
-        full_page: bool = False,
-        element_selector: Optional[str] = None,
-    ) -> Dict[str, Any]:
-        """
-        Take a screenshot and analyze it with VQA.
-        Captures a screenshot of the browser and uses a visual AI to
-        answer your question about what's visible on the page.
-        Args:
-            question: What you want to know about the screenshot.
-                Examples:
-                - "What buttons are visible?"
-                - "Is there an error message?"
-                - "What is the page title?"
-                - "Is the form filled out correctly?"
-            full_page: Capture full page (True) or just viewport (False).
-            element_selector: Optional CSS selector to screenshot specific element.
-        Returns:
-            Dict with:
-            - answer: The streamed answer to your question
-            - screenshot_info: Where the screenshot was saved, etc.
-        """
-        return await take_screenshot_and_analyze(
-            question=question,
-            full_page=full_page,
-            element_selector=element_selector,
-        )

code_puppy/tools/browser/vqa_agent.py DELETED Viewed

@@ -1,194 +0,0 @@
-"""Utilities for running visual question-answering via pydantic-ai."""
-from __future__ import annotations
-from collections.abc import AsyncIterable
-from typing import Any
-from pydantic import BaseModel, Field
-from pydantic_ai import Agent, BinaryContent, PartDeltaEvent, PartStartEvent, RunContext
-from pydantic_ai.messages import TextPart, TextPartDelta
-from code_puppy.config import get_use_dbos, get_vqa_model_name
-class VisualAnalysisResult(BaseModel):
-    """Structured response from the VQA agent."""
-    answer: str
-    confidence: float = Field(ge=0.0, le=1.0)
-    observations: str
-DEFAULT_VQA_INSTRUCTIONS = (
-    "You are a visual analysis specialist. Answer the user's question about the provided image. "
-    "Always respond using the structured schema: answer, confidence (0-1 float), observations. "
-    "Confidence reflects how certain you are about the answer. Observations should include useful, concise context."
-)
-async def run_vqa_analysis(
-    question: str,
-    image_bytes: bytes,
-    media_type: str = "image/png",
-) -> str:
-    """Execute the VQA agent asynchronously against screenshot bytes.
-    Follows the same pattern as agent_tools.py for prompt preparation
-    and model configuration.
-    Args:
-        question: The question to ask about the image.
-        image_bytes: The raw image bytes.
-        media_type: The MIME type of the image (default: "image/png").
-        system_prompt: Optional custom system prompt. If None, uses default VQA instructions.
-    Returns:
-        str: The answer from the VQA analysis.
-    """
-    from code_puppy import callbacks
-    from code_puppy.model_factory import ModelFactory
-    from code_puppy.model_utils import prepare_prompt_for_model
-    # Get model configuration
-    model_name = get_vqa_model_name()
-    models_config = ModelFactory.load_config()
-    model = ModelFactory.get_model(model_name, models_config)
-    # Build instructions: custom system_prompt or default VQA instructions
-    instructions = DEFAULT_VQA_INSTRUCTIONS
-    # Apply prompt additions (like file permission handling) - same as agent_tools.py
-    prompt_additions = callbacks.on_load_prompt()
-    if prompt_additions:
-        instructions += "\n" + "\n".join(prompt_additions)
-    # Handle claude-code models: swap instructions, prepend system prompt to user question
-    # Following the exact pattern from agent_tools.py
-    prepared = prepare_prompt_for_model(
-        model_name, instructions, question, prepend_system_to_user=True
-    )
-    instructions = prepared.instructions
-    question = prepared.user_prompt
-    # Create the VQA agent with string output
-    vqa_agent = Agent(
-        model=model,
-        instructions=instructions,
-    )
-    # Wrap with DBOS if enabled
-    if get_use_dbos():
-        from pydantic_ai.durable_exec.dbos import DBOSAgent
-        vqa_agent = DBOSAgent(vqa_agent, name="vqa-agent")
-    # Run the agent with the image
-    result = await vqa_agent.run(
-        [
-            question,
-            BinaryContent(data=image_bytes, media_type=media_type),
-        ]
-    )
-    return result.output
-def _create_vqa_stream_handler(
-    accumulator: list[str],
-):
-    """Create an event stream handler that accumulates text.
-    Args:
-        accumulator: List to accumulate text chunks into (pass empty list).
-    Returns:
-        Async event stream handler function.
-    """
-    async def vqa_event_stream_handler(
-        ctx: RunContext,
-        events: AsyncIterable[Any],
-    ) -> None:
-        """Handle streaming events - print text as it arrives."""
-        async for event in events:
-            # Handle text part start - might have initial content
-            if isinstance(event, PartStartEvent):
-                if isinstance(event.part, TextPart) and event.part.content:
-                    accumulator.append(event.part.content)
-            # Handle text deltas - the streaming bits
-            elif isinstance(event, PartDeltaEvent):
-                if isinstance(event.delta, TextPartDelta) and event.delta.content_delta:
-                    accumulator.append(event.delta.content_delta)
-    return vqa_event_stream_handler
-async def run_vqa_analysis_stream(
-    question: str,
-    image_bytes: bytes,
-    media_type: str = "image/png",
-) -> str:
-    """Execute the VQA agent with streaming output.
-    Streams text to console as it arrives and accumulates the full response.
-    Args:
-        question: The question to ask about the image.
-        image_bytes: The raw image bytes.
-        media_type: The MIME type of the image (default: "image/png").
-    Returns:
-        str: The accumulated answer from the VQA analysis.
-    """
-    from code_puppy import callbacks
-    from code_puppy.model_factory import ModelFactory
-    from code_puppy.model_utils import prepare_prompt_for_model
-    # Get model configuration
-    model_name = get_vqa_model_name()
-    models_config = ModelFactory.load_config()
-    model = ModelFactory.get_model(model_name, models_config)
-    # Build instructions
-    instructions = DEFAULT_VQA_INSTRUCTIONS
-    # Apply prompt additions (like file permission handling)
-    prompt_additions = callbacks.on_load_prompt()
-    if prompt_additions:
-        instructions += "\n" + "\n".join(prompt_additions)
-    # Handle claude-code models: swap instructions, prepend system prompt to user question
-    prepared = prepare_prompt_for_model(
-        model_name, instructions, question, prepend_system_to_user=True
-    )
-    instructions = prepared.instructions
-    question = prepared.user_prompt
-    # Create the VQA agent
-    vqa_agent = Agent(
-        model=model,
-        instructions=instructions,
-    )
-    # Wrap with DBOS if enabled
-    if get_use_dbos():
-        from pydantic_ai.durable_exec.dbos import DBOSAgent
-        vqa_agent = DBOSAgent(vqa_agent, name="vqa-agent-stream")
-    # Accumulator for streamed text (use list to allow mutation in handler)
-    accumulated_chunks: list[str] = []
-    # Create the stream handler
-    stream_handler = _create_vqa_stream_handler(accumulated_chunks)
-    # Run the agent with event_stream_handler
-    result = await vqa_agent.run(
-        [
-            question,
-            BinaryContent(data=image_bytes, media_type=media_type),
-        ],
-        event_stream_handler=stream_handler,
-    )
-    return result.output

{code_puppy-0.0.359.data → code_puppy-0.0.360.data}/data/code_puppy/models.json RENAMED Viewed

File without changes

{code_puppy-0.0.359.data → code_puppy-0.0.360.data}/data/code_puppy/models_dev_api.json RENAMED Viewed

File without changes

{code_puppy-0.0.359.dist-info → code_puppy-0.0.360.dist-info}/WHEEL RENAMED Viewed

File without changes

{code_puppy-0.0.359.dist-info → code_puppy-0.0.360.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{code_puppy-0.0.359.dist-info → code_puppy-0.0.360.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

code-puppy 0.0.359__py3-none-any.whl → 0.0.360__py3-none-any.whl

code-puppy 0.0.359py3-none-any.whl → 0.0.360py3-none-any.whl