PyPI - hud-python - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

hud-python 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show

hud/__init__.py +22 -89
hud/agents/__init__.py +17 -0
hud/agents/art.py +101 -0
hud/agents/base.py +599 -0
hud/{mcp → agents}/claude.py +373 -321
hud/{mcp → agents}/langchain.py +250 -250
hud/agents/misc/__init__.py +7 -0
hud/{agent → agents}/misc/response_agent.py +80 -80
hud/{mcp → agents}/openai.py +352 -334
hud/agents/openai_chat_generic.py +154 -0
hud/{mcp → agents}/tests/__init__.py +1 -1
hud/agents/tests/test_base.py +742 -0
hud/agents/tests/test_claude.py +324 -0
hud/{mcp → agents}/tests/test_client.py +363 -324
hud/{mcp → agents}/tests/test_openai.py +237 -238
hud/cli/__init__.py +617 -0
hud/cli/__main__.py +8 -0
hud/cli/analyze.py +371 -0
hud/cli/analyze_metadata.py +230 -0
hud/cli/build.py +427 -0
hud/cli/clone.py +185 -0
hud/cli/cursor.py +92 -0
hud/cli/debug.py +392 -0
hud/cli/docker_utils.py +83 -0
hud/cli/init.py +281 -0
hud/cli/interactive.py +353 -0
hud/cli/mcp_server.py +756 -0
hud/cli/pull.py +336 -0
hud/cli/push.py +379 -0
hud/cli/remote_runner.py +311 -0
hud/cli/runner.py +160 -0
hud/cli/tests/__init__.py +3 -0
hud/cli/tests/test_analyze.py +284 -0
hud/cli/tests/test_cli_init.py +265 -0
hud/cli/tests/test_cli_main.py +27 -0
hud/cli/tests/test_clone.py +142 -0
hud/cli/tests/test_cursor.py +253 -0
hud/cli/tests/test_debug.py +453 -0
hud/cli/tests/test_mcp_server.py +139 -0
hud/cli/tests/test_utils.py +388 -0
hud/cli/utils.py +263 -0
hud/clients/README.md +143 -0
hud/clients/__init__.py +16 -0
hud/clients/base.py +354 -0
hud/clients/fastmcp.py +202 -0
hud/clients/mcp_use.py +278 -0
hud/clients/tests/__init__.py +1 -0
hud/clients/tests/test_client_integration.py +111 -0
hud/clients/tests/test_fastmcp.py +342 -0
hud/clients/tests/test_protocol.py +188 -0
hud/clients/utils/__init__.py +1 -0
hud/clients/utils/retry_transport.py +160 -0
hud/datasets.py +322 -192
hud/misc/__init__.py +1 -0
hud/{agent → misc}/claude_plays_pokemon.py +292 -283
hud/otel/__init__.py +35 -0
hud/otel/collector.py +142 -0
hud/otel/config.py +164 -0
hud/otel/context.py +536 -0
hud/otel/exporters.py +366 -0
hud/otel/instrumentation.py +97 -0
hud/otel/processors.py +118 -0
hud/otel/tests/__init__.py +1 -0
hud/otel/tests/test_processors.py +197 -0
hud/server/__init__.py +5 -5
hud/server/context.py +114 -0
hud/server/helper/__init__.py +5 -0
hud/server/low_level.py +132 -0
hud/server/server.py +166 -0
hud/server/tests/__init__.py +3 -0
hud/settings.py +73 -79
hud/shared/__init__.py +5 -0
hud/{exceptions.py → shared/exceptions.py} +180 -180
hud/{server → shared}/requests.py +264 -264
hud/shared/tests/test_exceptions.py +157 -0
hud/{server → shared}/tests/test_requests.py +275 -275
hud/telemetry/__init__.py +25 -30
hud/telemetry/instrument.py +379 -0
hud/telemetry/job.py +309 -141
hud/telemetry/replay.py +74 -0
hud/telemetry/trace.py +83 -0
hud/tools/__init__.py +33 -34
hud/tools/base.py +365 -65
hud/tools/bash.py +161 -137
hud/tools/computer/__init__.py +15 -13
hud/tools/computer/anthropic.py +437 -420
hud/tools/computer/hud.py +376 -334
hud/tools/computer/openai.py +295 -292
hud/tools/computer/settings.py +82 -0
hud/tools/edit.py +314 -290
hud/tools/executors/__init__.py +30 -30
hud/tools/executors/base.py +539 -532
hud/tools/executors/pyautogui.py +621 -619
hud/tools/executors/tests/__init__.py +1 -1
hud/tools/executors/tests/test_base_executor.py +338 -338
hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
hud/tools/executors/xdo.py +511 -503
hud/tools/{playwright_tool.py → playwright.py} +412 -379
hud/tools/tests/__init__.py +3 -3
hud/tools/tests/test_base.py +282 -0
hud/tools/tests/test_bash.py +158 -152
hud/tools/tests/test_bash_extended.py +197 -0
hud/tools/tests/test_computer.py +425 -52
hud/tools/tests/test_computer_actions.py +34 -34
hud/tools/tests/test_edit.py +259 -240
hud/tools/tests/test_init.py +27 -27
hud/tools/tests/test_playwright_tool.py +183 -183
hud/tools/tests/test_tools.py +145 -157
hud/tools/tests/test_utils.py +156 -156
hud/tools/types.py +72 -0
hud/tools/utils.py +50 -50
hud/types.py +136 -89
hud/utils/__init__.py +10 -16
hud/utils/async_utils.py +65 -0
hud/utils/design.py +168 -0
hud/utils/mcp.py +55 -0
hud/utils/progress.py +149 -149
hud/utils/telemetry.py +66 -66
hud/utils/tests/test_async_utils.py +173 -0
hud/utils/tests/test_init.py +17 -21
hud/utils/tests/test_progress.py +261 -225
hud/utils/tests/test_telemetry.py +82 -37
hud/utils/tests/test_version.py +8 -8
hud/version.py +7 -7
hud_python-0.4.0.dist-info/METADATA +474 -0
hud_python-0.4.0.dist-info/RECORD +132 -0
hud_python-0.4.0.dist-info/entry_points.txt +3 -0
{hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
hud/adapters/__init__.py +0 -8
hud/adapters/claude/__init__.py +0 -5
hud/adapters/claude/adapter.py +0 -180
hud/adapters/claude/tests/__init__.py +0 -1
hud/adapters/claude/tests/test_adapter.py +0 -519
hud/adapters/common/__init__.py +0 -6
hud/adapters/common/adapter.py +0 -178
hud/adapters/common/tests/test_adapter.py +0 -289
hud/adapters/common/types.py +0 -446
hud/adapters/operator/__init__.py +0 -5
hud/adapters/operator/adapter.py +0 -108
hud/adapters/operator/tests/__init__.py +0 -1
hud/adapters/operator/tests/test_adapter.py +0 -370
hud/agent/__init__.py +0 -19
hud/agent/base.py +0 -126
hud/agent/claude.py +0 -271
hud/agent/langchain.py +0 -215
hud/agent/misc/__init__.py +0 -3
hud/agent/operator.py +0 -268
hud/agent/tests/__init__.py +0 -1
hud/agent/tests/test_base.py +0 -202
hud/env/__init__.py +0 -11
hud/env/client.py +0 -35
hud/env/docker_client.py +0 -349
hud/env/environment.py +0 -446
hud/env/local_docker_client.py +0 -358
hud/env/remote_client.py +0 -212
hud/env/remote_docker_client.py +0 -292
hud/gym.py +0 -130
hud/job.py +0 -773
hud/mcp/__init__.py +0 -17
hud/mcp/base.py +0 -631
hud/mcp/client.py +0 -312
hud/mcp/tests/test_base.py +0 -512
hud/mcp/tests/test_claude.py +0 -294
hud/task.py +0 -149
hud/taskset.py +0 -237
hud/telemetry/_trace.py +0 -347
hud/telemetry/context.py +0 -230
hud/telemetry/exporter.py +0 -575
hud/telemetry/instrumentation/__init__.py +0 -3
hud/telemetry/instrumentation/mcp.py +0 -259
hud/telemetry/instrumentation/registry.py +0 -59
hud/telemetry/mcp_models.py +0 -270
hud/telemetry/tests/__init__.py +0 -1
hud/telemetry/tests/test_context.py +0 -210
hud/telemetry/tests/test_trace.py +0 -312
hud/tools/helper/README.md +0 -56
hud/tools/helper/__init__.py +0 -9
hud/tools/helper/mcp_server.py +0 -78
hud/tools/helper/server_initialization.py +0 -115
hud/tools/helper/utils.py +0 -58
hud/trajectory.py +0 -94
hud/utils/agent.py +0 -37
hud/utils/common.py +0 -256
hud/utils/config.py +0 -120
hud/utils/deprecation.py +0 -115
hud/utils/misc.py +0 -53
hud/utils/tests/test_common.py +0 -277
hud/utils/tests/test_config.py +0 -129
hud_python-0.3.5.dist-info/METADATA +0 -284
hud_python-0.3.5.dist-info/RECORD +0 -120
/hud/{adapters/common → shared}/tests/__init__.py +0 -0
{hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0

hud/cli/debug.py ADDED Viewed

@@ -0,0 +1,392 @@
+"""Debug command implementation for MCP environments."""
+# ruff: noqa: G004
+from __future__ import annotations
+import asyncio
+import json
+import subprocess
+import threading
+import time
+from rich.console import Console
+from hud.clients import MCPClient
+from hud.utils.design import HUDDesign
+from .utils import CaptureLogger, Colors, analyze_error_for_hints
+console = Console()
+async def debug_mcp_stdio(command: list[str], logger: CaptureLogger, max_phase: int = 5) -> int:
+    """
+    Debug any stdio-based MCP server step by step.
+    Args:
+        command: Command and arguments to run the MCP server
+        logger: CaptureLogger instance for output
+        max_phase: Maximum phase to run (1-5, default 5 for all phases)
+    Returns:
+        Number of phases completed (0-5)
+    """
+    # Create design instance for initial output (before logger takes over)
+    if logger.print_output:
+        design = HUDDesign()
+        design.header("MCP Server Debugger", icon="🔍")
+        design.dim_info("Command:", " ".join(command))
+        design.dim_info("Time:", time.strftime("%Y-%m-%d %H:%M:%S"))
+        # Explain color coding using Rich formatting
+        design.info("\nColor Key:")
+        console.print("  [bold]■[/bold] Commands (bold)")
+        console.print("  [rgb(192,150,12)]■[/rgb(192,150,12)] STDIO (MCP protocol)")
+        console.print("  [dim]■[/dim] STDERR (server logs)")
+        console.print("  [green]■[/green] Success messages")
+        console.print("  [red]■[/red] Error messages")
+        console.print("  ■ Info messages")
+    phases_completed = 0
+    total_phases = 5
+    start_time = time.time()
+    # Phase 1: Basic Server Test
+    logger.phase(1, "Basic Server Startup Test")
+    try:
+        # Test if command runs at all
+        test_cmd = command + (["echo", "Server OK"] if "docker" in command[0] else [])
+        logger.command([*test_cmd[:3], "..."] if len(test_cmd) > 3 else test_cmd)
+        result = subprocess.run(  # noqa: S603, ASYNC221
+            command[:1],
+            capture_output=True,
+            text=True,
+            timeout=2,
+            encoding="utf-8",
+            errors="replace",
+        )
+        if result.returncode == 0 or "usage" in result.stderr.lower():
+            logger.success("Command executable found")
+            phases_completed = 1
+        else:
+            logger.error(f"Command failed with exit code {result.returncode}")
+            if result.stderr:
+                logger._log(
+                    f"Error output: {result.stderr}", Colors.RED if logger.print_output else ""
+                )
+                hint = analyze_error_for_hints(result.stderr)
+                if hint:
+                    logger.hint(hint)
+            logger.progress_bar(phases_completed, total_phases)
+            return phases_completed
+        # Check if we should stop here
+        if max_phase <= 1:
+            logger.info(f"Stopping at phase {max_phase} as requested")
+            logger.progress_bar(phases_completed, total_phases)
+            return phases_completed
+    except FileNotFoundError:
+        logger.error(f"Command not found: {command[0]}")
+        logger.hint("Ensure the command is installed and in PATH")
+        logger.progress_bar(phases_completed, total_phases)
+        return phases_completed
+    except Exception as e:
+        logger.error(f"Startup test failed: {e}")
+        logger.progress_bar(phases_completed, total_phases)
+        return phases_completed
+    # Phase 2: MCP Initialize Test
+    logger.phase(2, "MCP Server Initialize Test")
+    logger.info("STDIO is used for MCP protocol, STDERR for server logs")
+    init_request = {
+        "jsonrpc": "2.0",
+        "id": 1,
+        "method": "initialize",
+        "params": {
+            "protocolVersion": "2024-11-05",
+            "capabilities": {"roots": {"listChanged": True}},
+            "clientInfo": {"name": "DebugClient", "version": "1.0.0"},
+        },
+    }
+    try:
+        logger.command(command)
+        logger.stdio(f"Sending: {json.dumps(init_request)}")
+        proc = subprocess.Popen(  # noqa: S603, ASYNC220
+            command,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            bufsize=1,
+            encoding="utf-8",
+            errors="replace",  # Replace invalid chars with � on Windows
+        )
+        # Ensure pipes are available
+        if proc.stdin is None or proc.stdout is None or proc.stderr is None:
+            raise RuntimeError("Failed to create subprocess pipes")
+        # Send initialize
+        proc.stdin.write(json.dumps(init_request) + "\n")
+        proc.stdin.flush()
+        # Collect stderr in background
+        stderr_lines = []
+        def read_stderr() -> None:
+            if proc.stderr is None:
+                return
+            for line in proc.stderr:
+                line = line.rstrip()
+                if line:
+                    logger.stderr(line)
+                    stderr_lines.append(line)
+        stderr_thread = threading.Thread(target=read_stderr)
+        stderr_thread.daemon = True
+        stderr_thread.start()
+        # Wait for response
+        response = None
+        start = time.time()
+        while time.time() - start < 15:
+            line = proc.stdout.readline()
+            if line:
+                try:
+                    response = json.loads(line)
+                    if response.get("id") == 1:
+                        logger.stdio(f"Received: {json.dumps(response)}")
+                        break
+                except Exception as e:
+                    logger.error(f"Failed to parse MCP response: {e}")
+                    continue
+        if response and "result" in response:
+            logger.success("MCP server initialized successfully")
+            server_info = response["result"].get("serverInfo", {})
+            logger.info(
+                f"Server: {server_info.get('name', 'Unknown')} v{server_info.get('version', '?')}"
+            )
+            # Show capabilities
+            caps = response["result"].get("capabilities", {})
+            if caps:
+                logger.info(f"Capabilities: {', '.join(caps.keys())}")
+            phases_completed = 2
+        else:
+            logger.error("No valid MCP response received")
+            # Analyze stderr for hints
+            if stderr_lines:
+                all_stderr = "\n".join(stderr_lines)
+                hint = analyze_error_for_hints(all_stderr)
+                if hint:
+                    logger.hint(hint)
+            else:
+                logger.hint("""MCP requires clean stdout. Ensure:
+   - All print() statements use file=sys.stderr
+   - Logging is configured to use stderr
+   - No libraries are printing to stdout""")
+            logger.progress_bar(phases_completed, total_phases)
+            proc.terminate()
+            try:
+                proc.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                proc.kill()
+                proc.wait()
+            return phases_completed
+        proc.terminate()
+        try:
+            proc.wait(timeout=5)
+        except subprocess.TimeoutExpired:
+            proc.kill()
+            proc.wait()
+        # Check if we should stop here
+        if phases_completed >= max_phase:
+            logger.info(f"Stopping at phase {max_phase} as requested")
+            logger.progress_bar(phases_completed, total_phases)
+            return phases_completed
+    except Exception as e:
+        logger.error(f"MCP test failed: {e}")
+        hint = analyze_error_for_hints(str(e))
+        if hint:
+            logger.hint(hint)
+        logger.progress_bar(phases_completed, total_phases)
+        return phases_completed
+    # Phase 3: Tool Discovery
+    logger.phase(3, "MCP Tool Discovery Test")
+    client = None
+    try:
+        # Create MCP config for the command
+        mcp_config = {
+            "test": {"command": command[0], "args": command[1:] if len(command) > 1 else []}
+        }
+        logger.command(command)
+        logger.info("Creating MCP client via hud...")
+        client = MCPClient(mcp_config=mcp_config, verbose=False, auto_trace=False)
+        await client.initialize()
+        # Wait for initialization
+        logger.info("Waiting for server initialization...")
+        await asyncio.sleep(5)
+        # Get tools
+        tools = await client.list_tools()
+        if tools:
+            logger.success(f"Found {len(tools)} tools")
+            # Check for lifecycle tools
+            tool_names = [t.name for t in tools]
+            has_setup = "setup" in tool_names
+            has_evaluate = "evaluate" in tool_names
+            logger.info(
+                f"Lifecycle tools: setup={'✅' if has_setup else '❌'}, evaluate={'✅' if has_evaluate else '❌'}"  # noqa: E501
+            )
+            # Check for interaction tools
+            interaction_tools = [
+                name
+                for name in tool_names
+                if name in ["computer", "playwright", "click", "type", "interact", "move"]
+            ]
+            if interaction_tools:
+                logger.info(f"Interaction tools: {', '.join(interaction_tools)}")
+            # List all tools
+            logger.info(f"All tools: {', '.join(tool_names)}")
+            # Try to list resources
+            try:
+                resources = await client.list_resources()
+                if resources:
+                    logger.info(
+                        f"Found {len(resources)} resources: {', '.join(str(r.uri) for r in resources[:3])}..."  # noqa: E501
+                    )
+            except Exception as e:
+                logger.error(f"Failed to list resources: {e}")
+            phases_completed = 3
+        else:
+            logger.error("No tools found")
+            logger.hint("""No tools found. Ensure:
+   - @mcp.tool() decorator is used on functions
+   - Tools are registered before mcp.run()
+   - No import errors preventing tool registration""")
+            logger.progress_bar(phases_completed, total_phases)
+            return phases_completed
+        # Check if we should stop here
+        if phases_completed >= max_phase:
+            logger.info(f"Stopping at phase {max_phase} as requested")
+            logger.progress_bar(phases_completed, total_phases)
+            return phases_completed
+        # Phase 4: Remote Deployment Readiness
+        logger.phase(4, "Remote Deployment Readiness")
+        # Test if setup/evaluate exist
+        if "setup" in tool_names:
+            try:
+                logger.info("Testing setup tool...")
+                await client.call_tool(name="setup", arguments={})
+                logger.success("Setup tool responded")
+            except Exception as e:
+                logger.info(f"Setup tool test: {e}")
+        if "evaluate" in tool_names:
+            try:
+                logger.info("Testing evaluate tool...")
+                await client.call_tool(name="evaluate", arguments={})
+                logger.success("Evaluate tool responded")
+            except Exception as e:
+                logger.info(f"Evaluate tool test: {e}")
+        # Performance check
+        init_time = time.time() - start_time
+        logger.info(f"Total initialization time: {init_time:.2f}s")
+        if init_time > 30:
+            logger.error("Initialization took >30s - may be too slow")
+            logger.hint("Consider optimizing startup time")
+        phases_completed = 4
+        # Check if we should stop here
+        if phases_completed >= max_phase:
+            logger.info(f"Stopping at phase {max_phase} as requested")
+            logger.progress_bar(phases_completed, total_phases)
+            return phases_completed
+        # Phase 5: Concurrent Clients
+        logger.phase(5, "Concurrent Clients Testing")
+        concurrent_clients = []
+        try:
+            logger.info("Creating 3 concurrent MCP clients...")
+            for i in range(3):
+                client_config = {
+                    f"test_concurrent_{i}": {
+                        "command": command[0],
+                        "args": command[1:] if len(command) > 1 else [],
+                    }
+                }
+                concurrent_client = MCPClient(
+                    mcp_config=client_config, verbose=False, auto_trace=False
+                )
+                await concurrent_client.initialize()
+                concurrent_clients.append(concurrent_client)
+                logger.info(f"Client {i + 1} connected")
+            logger.success("All concurrent clients connected")
+            # Clean shutdown
+            for i, c in enumerate(concurrent_clients):
+                await c.shutdown()
+                logger.info(f"Client {i + 1} disconnected")
+            phases_completed = 5
+        except Exception as e:
+            logger.error(f"Concurrent test failed: {e}")
+        finally:
+            for c in concurrent_clients:
+                try:
+                    await c.shutdown()
+                except Exception as e:
+                    logger.error(f"Failed to close client: {e}")
+    except Exception as e:
+        logger.error(f"Tool discovery failed: {e}")
+        logger.progress_bar(phases_completed, total_phases)
+        return phases_completed
+    finally:
+        # Ensure client is closed even on exceptions
+        if client:
+            try:
+                await client.shutdown()
+            except Exception:
+                logger.error("Failed to close client")
+    logger.progress_bar(phases_completed, total_phases)
+    return phases_completed

hud/cli/docker_utils.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""Docker utilities for HUD CLI."""
+from __future__ import annotations
+import json
+import subprocess
+def get_docker_cmd(image: str) -> list[str] | None:
+    """
+    Extract the CMD from a Docker image.
+    Args:
+        image: Docker image name
+    Returns:
+        List of command parts or None if not found
+    """
+    try:
+        result = subprocess.run(  # noqa: S603
+            ["docker", "inspect", image],  # noqa: S607
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        inspect_data = json.loads(result.stdout)
+        if inspect_data and len(inspect_data) > 0 and isinstance(inspect_data[0], dict):
+            config = inspect_data[0].get("Config", {})
+            cmd = config.get("Cmd", [])
+            return cmd if cmd else None
+    except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError):
+        return None
+def inject_supervisor(cmd: list[str]) -> list[str]:
+    """
+    Inject watchfiles CLI supervisor into a Docker CMD.
+    For shell commands, we inject before the last exec command.
+    For direct commands, we wrap the entire command.
+    Args:
+        cmd: Original Docker CMD
+    Returns:
+        Modified CMD with watchfiles supervisor injected
+    """
+    if not cmd:
+        return cmd
+    # Handle shell commands that might have background processes
+    if cmd[0] in ["sh", "bash"] and len(cmd) >= 3 and cmd[1] == "-c":
+        shell_cmd = cmd[2]
+        # Look for 'exec' in the shell command - this is the last command
+        if " exec " in shell_cmd:
+            # Replace only the exec'd command with watchfiles
+            parts = shell_cmd.rsplit(" exec ", 1)
+            if len(parts) == 2:
+                # Extract the actual command after exec
+                last_cmd = parts[1].strip()
+                # Use watchfiles with logs redirected to stderr (which won't interfere with MCP on stdout)  # noqa: E501
+                new_shell_cmd = f"{parts[0]} exec watchfiles --verbose '{last_cmd}' /app/src"
+                return [cmd[0], cmd[1], new_shell_cmd]
+        else:
+            # No exec, the whole thing is the command
+            return ["sh", "-c", f"watchfiles --verbose '{shell_cmd}' /app/src"]
+    # Direct command - wrap with watchfiles
+    watchfiles_cmd = " ".join(cmd)
+    return ["sh", "-c", f"watchfiles --verbose '{watchfiles_cmd}' /app/src"]
+def image_exists(image_name: str) -> bool:
+    """Check if a Docker image exists locally."""
+    result = subprocess.run(  # noqa: S603
+        ["docker", "image", "inspect", image_name],  # noqa: S607
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+    return result.returncode == 0

hud-python 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

hud-python 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl