PyPI - hud-python - Versions diffs - 0.4.12__tar.gz → 0.4.14__tar.gz - Mend

hud-python 0.4.12tar.gz → 0.4.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (172) hide show

{hud_python-0.4.12 → hud_python-0.4.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.4.12
+Version: 0.4.14
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -35,10 +35,9 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Requires-Python: <3.14,>=3.11
-Requires-Dist: fastmcp>=2.11.2
 Requires-Dist: httpx<1,>=0.23.0
-Requires-Dist: hud-mcp-python-sdk>=0.1.0
-Requires-Dist: mcp>=1.13.1
+Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
+Requires-Dist: hud-mcp-python-sdk>=3.13.2
 Requires-Dist: opentelemetry-api>=1.34.1
 Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
 Requires-Dist: opentelemetry-instrumentation-mcp>=0.44.1
@@ -56,6 +55,7 @@ Provides-Extra: agent
 Requires-Dist: anthropic; extra == 'agent'
 Requires-Dist: datasets>=2.14.0; extra == 'agent'
 Requires-Dist: dotenv>=0.9.9; extra == 'agent'
+Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agent'
 Requires-Dist: ipykernel; extra == 'agent'
 Requires-Dist: ipython<9; extra == 'agent'
 Requires-Dist: jupyter-client; extra == 'agent'
@@ -63,13 +63,13 @@ Requires-Dist: jupyter-core; extra == 'agent'
 Requires-Dist: langchain; extra == 'agent'
 Requires-Dist: langchain-anthropic; extra == 'agent'
 Requires-Dist: langchain-openai; extra == 'agent'
-Requires-Dist: mcp-use; extra == 'agent'
 Requires-Dist: numpy>=1.24.0; extra == 'agent'
 Requires-Dist: openai; extra == 'agent'
 Provides-Extra: agents
 Requires-Dist: anthropic; extra == 'agents'
 Requires-Dist: datasets>=2.14.0; extra == 'agents'
 Requires-Dist: dotenv>=0.9.9; extra == 'agents'
+Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agents'
 Requires-Dist: ipykernel; extra == 'agents'
 Requires-Dist: ipython<9; extra == 'agents'
 Requires-Dist: jupyter-client; extra == 'agents'
@@ -77,7 +77,6 @@ Requires-Dist: jupyter-core; extra == 'agents'
 Requires-Dist: langchain; extra == 'agents'
 Requires-Dist: langchain-anthropic; extra == 'agents'
 Requires-Dist: langchain-openai; extra == 'agents'
-Requires-Dist: mcp-use; extra == 'agents'
 Requires-Dist: numpy>=1.24.0; extra == 'agents'
 Requires-Dist: openai; extra == 'agents'
 Provides-Extra: dev
@@ -85,6 +84,7 @@ Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
 Requires-Dist: anthropic; extra == 'dev'
 Requires-Dist: datasets>=2.14.0; extra == 'dev'
 Requires-Dist: dotenv>=0.9.9; extra == 'dev'
+Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'dev'
 Requires-Dist: inspect-ai>=0.3.80; extra == 'dev'
 Requires-Dist: ipykernel; extra == 'dev'
 Requires-Dist: ipython<9; extra == 'dev'
@@ -93,7 +93,6 @@ Requires-Dist: jupyter-core; extra == 'dev'
 Requires-Dist: langchain; extra == 'dev'
 Requires-Dist: langchain-anthropic; extra == 'dev'
 Requires-Dist: langchain-openai; extra == 'dev'
-Requires-Dist: mcp-use; extra == 'dev'
 Requires-Dist: numpy>=1.24.0; extra == 'dev'
 Requires-Dist: openai; extra == 'dev'
 Requires-Dist: pillow>=11.1.0; extra == 'dev'

{hud_python-0.4.12 → hud_python-0.4.14}/environments/README.md RENAMED Viewed

@@ -351,7 +351,7 @@ from . import basic, advanced  # This registers all @setup.tool() decorated func
 # In setup/basic.py
 from . import setup
-from hud.tools.types import SetupResult
+from mcp.types import TextContent
 @setup.tool()
 async def reset(**kwargs):
@@ -361,14 +361,14 @@ async def reset(**kwargs):
         **kwargs: Additional parameters
     Returns:
-        SetupResult
+        TextContent
     """
     # Access environment from the hub
     env = setup.env
     await env.reset_state()
-    return SetupResult(
-        content="Environment reset to initial state",
-        info={"status": "success"}
+    return TextContent(
+        text="Environment reset to initial state",
+        type="text"
     )
 @setup.tool()
@@ -379,14 +379,14 @@ async def seed_data(num_items: int = 5):
         num_items: Number of items to create
     Returns:
-        SetupResult
+        TextContent
     """
     # Access environment from the hub
     env = setup.env
     items = await env.create_items(num_items)
-    return SetupResult(
-        content=f"Created {len(items)} items",
-        info={"items_created": len(items)}
+    return TextContent(
+        text=f"Created {len(items)} items",
+        type="text"
     )
 # In evaluate/__init__.py
@@ -827,13 +827,13 @@ Before making changes:
 ```python
 # In setup/my_new_setup.py
 from . import setup
-from hud.tools import BaseSetup, SetupResult
+from hud.tools import BaseSetup, TextContent
 @setup("my_new_setup", description="Clear description of what this does")
 class MyNewSetup(BaseSetup):
-    async def __call__(self, context, param1: str, param2: int = 10) -> SetupResult:
+    async def __call__(self, context, param1: str, param2: int = 10) -> TextContent:
         # Implementation
-        return {"status": "success", "details": "..."}
+        return TextContent(...)
 ```
 **Adding New Evaluators**

hud_python-0.4.14/environments/browser/README.md ADDED Viewed

@@ -0,0 +1,213 @@
+# Browser Environment
+A browser automation environment for HUD that provides GUI access and web app interaction capabilities. This environment supports hot-reloading during development while maintaining persistent state.
+## Architecture Overview
+The browser environment uses a two-process architecture:
+1. **Context Server** (`context.py`): Long-running process that maintains persistent state
+2. **MCP Server** (`server.py`): Hot-reloadable process that handles tool requests
+### Key Components
+- **BrowserContext**: Stores persistent state (running apps, ports, playwright instance)
+- **ServiceManager**: Manages X11, VNC, and app processes
+- **BaseHub Tools**: Setup and evaluate tools organized by app (2048, todo)
+- **Multiprocessing Proxy**: Enables state sharing between processes
+## Context Management and Common Pitfalls
+### Understanding the Proxy System
+The browser environment uses Python's `multiprocessing.Manager` to share state between the context server and MCP server. This introduces important constraints:
+#### ❌ Common Pitfall: Unpicklable Objects
+```python
+# BAD: This will fail with "cannot pickle 'coroutine' object"
+@setup.tool("my_tool")
+async def my_tool():
+    env = setup.env
+    result = await env.call_app_api("app", "/api/endpoint")  # Returns coroutine
+    # The coroutine can't be serialized through the proxy!
+```
+#### ✅ Solution: Direct HTTP Calls
+```python
+# GOOD: Make HTTP calls directly
+@setup.tool("my_tool")
+async def my_tool():
+    import httpx
+    # Get the backend port from persistent context
+    persistent_ctx = setup.env
+    backend_port = persistent_ctx.get_app_backend_port("app")
+    # Make API call directly
+    url = f"http://localhost:{backend_port}/api/endpoint"
+    async with httpx.AsyncClient() as client:
+        response = await client.get(url)
+        response.raise_for_status()
+        result = response.json()
+```
+### State Synchronization Issues
+#### ❌ Common Pitfall: Direct List/Dict Manipulation
+```python
+# BAD: Regular Python lists don't sync through proxy
+class ServiceManager:
+    def __init__(self):
+        self._launched_apps = []  # Won't sync!
+```
+#### ✅ Solution: Store State in Persistent Context
+```python
+# GOOD: Use the persistent context for shared state
+class BrowserContext:
+    def __init__(self):
+        self._running_apps: List[str] = []
+        self._app_ports: Dict[str, Dict[str, int]] = {}
+    def add_running_app(self, app_name: str) -> None:
+        """Add app to running list."""
+        if app_name not in self._running_apps:
+            self._running_apps.append(app_name)
+```
+### Accessing Shared Resources
+#### ❌ Common Pitfall: Direct Attribute Access
+```python
+# BAD: Direct attribute access on proxy objects
+playwright_tool = env.playwright  # May not work with proxy
+```
+#### ✅ Solution: Use Getter Methods
+```python
+# GOOD: Use proxy-friendly getter methods
+playwright_tool = persistent_ctx.get_playwright_tool()
+```
+## Best Practices
+### 1. Tool Implementation Pattern
+All setup and evaluate tools should follow this pattern:
+```python
+@setup.tool("tool_name")
+async def tool_name(param1: type, param2: type):
+    """Tool description."""
+    try:
+        # Get persistent context
+        persistent_ctx = setup.env  # or evaluate.env
+        # Get app ports
+        backend_port = persistent_ctx.get_app_backend_port("app_name")
+        # Make HTTP request
+        url = f"http://localhost:{backend_port}/api/endpoint"
+        async with httpx.AsyncClient() as client:
+            response = await client.method(url, json=data)
+            response.raise_for_status()
+            result = response.json()
+        # Return result
+        return TextContent(
+            text=f"Success message",
+            type="text"
+        )
+    except Exception as e:
+        logger.error(f"tool_name failed: {e}")
+        return TextContent(
+            text=f"Failed: {str(e)}",
+            type="text"
+        )
+```
+### 2. App Launch Pattern
+When launching apps, ensure ports are stored in the persistent context:
+```python
+# In launch_app tool
+app_info = await service_manager.launch_app(app_name)
+# Store ports in persistent context for later access
+try:
+    backend_port = service_manager.get_app_port(app_name)
+    frontend_port = service_manager.get_app_frontend_port(app_name)
+    persistent_ctx.set_app_ports(app_name, frontend_port, backend_port)
+except Exception as e:
+    logger.error(f"Failed to store ports: {e}")
+# Track app in persistent context
+persistent_ctx.add_running_app(app_name)
+```
+### 3. Import Organization
+Keep imports at module level:
+```python
+# At top of file
+import logging
+import httpx
+from mcp.types import TextContent
+from . import setup
+# Not inside functions
+```
+## Troubleshooting
+### "Cannot pickle 'coroutine' object"
+**Cause**: Trying to return an async function result through the proxy.
+**Fix**: Don't use async methods on proxied objects. Make direct HTTP calls instead.
+### "App not launched" errors
+**Cause**: State synchronization issue between ServiceManager and persistent context.
+**Fix**: Ensure `launch_app` stores app info in the persistent context, and setup/evaluate tools check the persistent context's app list.
+### "Object has no attribute" on proxy objects
+**Cause**: Direct attribute access on multiprocessing proxy objects.
+**Fix**: Use getter/setter methods instead of direct attribute access.
+## Development Workflow
+1. **Start the environment**: `hud dev`
+2. **Make changes**: Edit tools in `src/hud_controller/`
+3. **Test immediately**: The MCP server hot-reloads automatically
+4. **Check logs**: Look for serialization or proxy errors
+## Adding New Apps
+1. Create app directory in `apps/`
+2. Add setup tools in `src/hud_controller/setup/app_name.py`
+3. Add evaluate tools in `src/hud_controller/evaluate/app_name.py`
+4. Follow the HTTP pattern - no `call_app_api` usage
+5. Store app ports in persistent context when launching
+## Key Files
+- `context.py`: Persistent state management
+- `server.py`: MCP server and tool definitions
+- `services.py`: Process management for X11, VNC, apps
+- `setup/`: Setup tools organized by app
+- `evaluate/`: Evaluation tools organized by app
+Remember: When in doubt, make direct HTTP calls and store state in the persistent context!

{hud_python-0.4.12 → hud_python-0.4.14}/environments/remote_browser/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "hud-remote-browser"
 version = "0.1.0"
 description = "HUD Remote Browser Controller with MCP tools for cloud browser providers"
 requires-python = ">=3.11,<3.13"
-dependencies = [ "hud-python==0.4.12", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
+dependencies = [ "hud-python>=0.4.12", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
 [build-system]
 requires = [ "hatchling",]

{hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/__init__.py RENAMED Viewed

@@ -348,6 +348,11 @@ def dev(
     ),
     port: int = typer.Option(8765, "--port", "-p", help="HTTP server port (ignored for stdio)"),
     no_reload: bool = typer.Option(False, "--no-reload", help="Disable hot-reload"),
+    full_reload: bool = typer.Option(
+        False,
+        "--full-reload",
+        help="Restart entire container on file changes (instead of just server process)",
+    ),
     verbose: bool = typer.Option(False, "--verbose", "-v", help="Show server logs"),
     inspector: bool = typer.Option(
         False, "--inspector", help="Launch MCP Inspector (HTTP mode only)"
@@ -375,12 +380,13 @@ def dev(
         hud dev . --inspector        # Launch MCP Inspector (HTTP mode only)
         hud dev . --interactive      # Launch interactive testing mode (HTTP mode only)
         hud dev . --no-logs          # Disable Docker log streaming
+        hud dev . --full-reload      # Restart entire container on file changes (instead of just server)
         # With Docker arguments (after all options):
         hud dev . -e BROWSER_PROVIDER=anchorbrowser -e ANCHOR_API_KEY=xxx
         hud dev . -e API_KEY=secret -v /tmp/data:/data --network host
         hud dev . --build -e DEBUG=true --memory 2g
-    """
+    """  # noqa: E501
     # Parse directory and Docker arguments
     if params:
         directory = params[0]
@@ -397,6 +403,7 @@ def dev(
         transport,
         port,
         no_reload,
+        full_reload,
         verbose,
         inspector,
         no_logs,

{hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/dev.py RENAMED Viewed

@@ -35,6 +35,7 @@ def create_proxy_server(
     directory: str | Path,
     image_name: str,
     no_reload: bool = False,
+    full_reload: bool = False,
     verbose: bool = False,
     docker_args: list[str] | None = None,
     interactive: bool = False,
@@ -48,8 +49,12 @@ def create_proxy_server(
         design.warning(f"Could not extract CMD from {image_name}, using default")
         original_cmd = ["python", "-m", "hud_controller.server"]
-    # Generate container name from image
-    container_name = f"{image_name.replace(':', '-').replace('/', '-')}"
+    # Generate unique container name from image to avoid conflicts between multiple instances
+    import os
+    pid = str(os.getpid())[-6:]  # Last 6 digits of process ID for uniqueness
+    base_name = image_name.replace(":", "-").replace("/", "-")
+    container_name = f"{base_name}-{pid}"
     # Build the docker run command
     docker_cmd = [
@@ -73,14 +78,20 @@ def create_proxy_server(
     if interactive:
         no_reload = True
-    if not no_reload:
-        # Inject our supervisor into the CMD
+    # Validate reload options
+    if no_reload and full_reload:
+        design.warning("Cannot use --full-reload with --no-reload, ignoring --full-reload")
+        full_reload = False
+    if not no_reload and not full_reload:
+        # Standard hot-reload: inject supervisor for server restart within container
         modified_cmd = inject_supervisor(original_cmd)
         docker_cmd.extend(["--entrypoint", modified_cmd[0]])
         docker_cmd.append(image_name)
         docker_cmd.extend(modified_cmd[1:])
     else:
-        # No reload - use original CMD
+        # No reload or full reload: use original CMD without supervisor
+        # Note: Full reload logic (container restart) would be implemented here in the future
         docker_cmd.append(image_name)
     # Create configuration following MCPConfig schema
@@ -96,9 +107,14 @@ def create_proxy_server(
     # Debug output - only if verbose
     if verbose:
-        if not no_reload:
+        if not no_reload and not full_reload:
+            design.info("Mode: Hot-reload (server restart within container)")
             design.info("Watching: /app/src for changes")
+        elif full_reload:
+            design.info("Mode: Full reload (container restart on file changes)")
+            design.info("Note: Full container restart not yet implemented, using no-reload mode")
         else:
+            design.info("Mode: No reload")
             design.info("Container will run without hot-reload")
         design.command_example(f"docker logs -f {container_name}", "View container logs")
@@ -127,6 +143,7 @@ async def start_mcp_proxy(
     transport: str,
     port: int,
     no_reload: bool = False,
+    full_reload: bool = False,
     verbose: bool = False,
     inspector: bool = False,
     no_logs: bool = False,
@@ -212,8 +229,12 @@ async def start_mcp_proxy(
         design.error(f"Source directory not found: {src_path}")
         raise click.Abort
-    # Extract container name from the proxy configuration
-    container_name = f"{image_name.replace(':', '-').replace('/', '-')}"
+    # Extract container name from the proxy configuration (must match create_proxy_server naming)
+    import os
+    pid = str(os.getpid())[-6:]  # Last 6 digits of process ID for uniqueness
+    base_name = image_name.replace(":", "-").replace("/", "-")
+    container_name = f"{base_name}-{pid}"
     # Remove any existing container with the same name (silently)
     # Note: The proxy creates containers on-demand when clients connect
@@ -347,6 +368,7 @@ async def start_mcp_proxy(
         # Always show waiting message
         log_design.info("")  # Empty line for spacing
         log_design.progress_message("⏳ Waiting for first client connection to start container...")
+        log_design.info(f"📋 Looking for container: {container_name}")  # noqa: G004
         # Keep trying to stream logs - container is created on demand
         has_shown_started = False
@@ -397,7 +419,8 @@ async def start_mcp_proxy(
                         # Show all logs with gold formatting like hud debug
                         # Format all logs in gold/dim style like hud debug's stderr
-                        log_design.console.print(
+                        # Use stdout console to avoid stderr redirection when not verbose
+                        log_design._stdout_console.print(
                             f"[rgb(192,150,12)]■[/rgb(192,150,12)] {decoded_line}", highlight=False
                         )
@@ -408,16 +431,19 @@ async def start_mcp_proxy(
                 await asyncio.sleep(1)
                 continue  # Loop back to check if container exists
-            except Exception:
-                # Some unexpected error
+            except Exception as e:
+                # Some unexpected error - show it so we can debug
+                log_design.warning(f"Failed to stream Docker logs: {e}")  # noqa: G004
                 if verbose:
-                    log_design.warning("Failed to stream logs")
+                    import traceback
+                    log_design.warning(f"Traceback: {traceback.format_exc()}")  # noqa: G004
                 await asyncio.sleep(1)
     # CRITICAL: Create proxy AFTER all logging setup to prevent it from resetting logging config
     # This is important because FastMCP might initialize loggers during creation
     proxy = create_proxy_server(
-        directory, image_name, no_reload, verbose, docker_args or [], interactive
+        directory, image_name, no_reload, full_reload, verbose, docker_args or [], interactive
     )
     # One more attempt to suppress the FastMCP server log
@@ -548,6 +574,7 @@ def run_mcp_dev_server(
     transport: str = "http",
     port: int = 8765,
     no_reload: bool = False,
+    full_reload: bool = False,
     verbose: bool = False,
     inspector: bool = False,
     no_logs: bool = False,
@@ -706,6 +733,7 @@ def run_mcp_dev_server(
                 transport,
                 port,
                 no_reload,
+                full_reload,
                 verbose,
                 inspector,
                 no_logs,

{hud_python-0.4.12 → hud_python-0.4.14}/hud/cli/eval.py RENAMED Viewed

@@ -26,15 +26,6 @@ def build_agent(
     """Create and return the requested agent type."""
     # Import agents lazily to avoid dependency issues
-    try:
-        from hud.agents.misc.response_agent import ResponseAgent
-    except ImportError as e:
-        design.error(
-            "Agent dependencies are not installed. "
-            "Please install with: pip install 'hud-python[agent]'"
-        )
-        raise typer.Exit(1) from e
     if agent_type == "openai":
         try:
             from hud.agents import OperatorAgent
@@ -45,12 +36,12 @@ def build_agent(
             )
             raise typer.Exit(1) from e
-        allowed_tools = allowed_tools or ["openai_computer"]
-        return OperatorAgent(
-            allowed_tools=allowed_tools,
-            response_agent=ResponseAgent(),
-        )
+        if allowed_tools:
+            return OperatorAgent(
+                allowed_tools=allowed_tools,
+            )
+        else:
+            return OperatorAgent()
     # Fallback Claude agent (Anthropic)
     try:
@@ -63,13 +54,16 @@ def build_agent(
         raise typer.Exit(1) from e
     model = model or "claude-sonnet-4-20250514"
-    allowed_tools = allowed_tools or ["anthropic_computer"]
-    return ClaudeAgent(
-        model=model,
-        allowed_tools=allowed_tools,
-        response_agent=ResponseAgent(),
-    )
+    if allowed_tools:
+        return ClaudeAgent(
+            model=model,
+            allowed_tools=allowed_tools,
+        )
+    else:
+        return ClaudeAgent(
+            model=model,
+        )
 async def run_single_task(
@@ -100,8 +94,8 @@ async def run_single_task(
         with open(path) as f:  # noqa: ASYNC230
             json_data = json.load(f)
-        # Check if JSON contains a list of tasks
-        if isinstance(json_data, list):
+        # Check if JSON contains multiple tasks (list with more than 1 task)
+        if isinstance(json_data, list) and len(json_data) > 1:
             design.info(f"Found {len(json_data)} tasks in JSON file, running as dataset…")
             # Build agent class and config for run_dataset
@@ -118,8 +112,10 @@ async def run_single_task(
                     raise typer.Exit(1) from e
                 agent_config: dict[str, Any] = {
-                    "allowed_tools": allowed_tools or ["openai_computer"],
                 }
+                if allowed_tools:
+                    agent_config["allowed_tools"] = allowed_tools
             else:
                 try:
                     from hud.agents import ClaudeAgent
@@ -134,8 +130,9 @@ async def run_single_task(
                 agent_config = {
                     "model": model or "claude-sonnet-4-20250514",
-                    "allowed_tools": allowed_tools or ["anthropic_computer"],
                 }
+                if allowed_tools:
+                    agent_config["allowed_tools"] = allowed_tools
             # Run as dataset with single-task concurrency to maintain debug behavior
             results = await run_dataset(
@@ -146,7 +143,6 @@ async def run_single_task(
                 max_concurrent=1,  # Run sequentially for debug mode
                 metadata={"source": str(path)},
                 max_steps=max_steps,
-                auto_respond=True,
             )
             # Display summary
@@ -154,8 +150,15 @@ async def run_single_task(
             design.success(f"Completed {len(results)} tasks: {successful} successful")
             return
-        # Single task JSON
-        task = Task(**json_data)
+        # Single task JSON (either direct object or list with 1 task)
+        if isinstance(json_data, list) and len(json_data) == 1:
+            design.info("Found 1 task in JSON file, running as single task…")
+            task = Task(**json_data[0])
+        elif isinstance(json_data, dict):
+            task = Task(**json_data)
+        else:
+            design.error("JSON file must contain a list of tasks when using --full flag")
+            raise typer.Exit(1)
     else:
         # Load from HuggingFace dataset
         try:
@@ -238,8 +241,10 @@ async def run_full_dataset(
             raise typer.Exit(1) from e
         agent_config: dict[str, Any] = {
-            "allowed_tools": allowed_tools or ["openai_computer"],
         }
+        if allowed_tools:
+            agent_config["allowed_tools"] = allowed_tools
     else:
         try:
             from hud.agents import ClaudeAgent
@@ -254,8 +259,9 @@ async def run_full_dataset(
         agent_config = {
             "model": model or "claude-sonnet-4-20250514",
-            "allowed_tools": allowed_tools or ["anthropic_computer"],
         }
+        if allowed_tools:
+            agent_config["allowed_tools"] = allowed_tools
     design.info("🚀 Running evaluation…")
     return await run_dataset(
@@ -266,7 +272,6 @@ async def run_full_dataset(
         max_concurrent=max_concurrent,
         metadata={"dataset": source},
         max_steps=max_steps,
-        auto_respond=True,
     )

hud-python 0.4.12__tar.gz → 0.4.14__tar.gz

Potentially problematic release.

hud-python 0.4.12tar.gz → 0.4.14tar.gz