PyPI - hud-python - Versions diffs - 0.4.16__tar.gz → 0.4.18__tar.gz - Mend

hud-python 0.4.16tar.gz → 0.4.18tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (183) hide show

{hud_python-0.4.16 → hud_python-0.4.18}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.4.16
+Version: 0.4.18
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues

{hud_python-0.4.16 → hud_python-0.4.18}/hud/agents/claude.py RENAMED Viewed

@@ -85,8 +85,8 @@ class ClaudeAgent(MCPAgent):
         self._claude_to_mcp_tool_map: dict[str, str] = {}
         self.claude_tools: list[dict] = []
-        # Base system prompt for autonomous operation
-        self.system_prompt = """
+        # Append Claude-specific instructions to the base system prompt
+        claude_instructions = """
         You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
         When working on tasks:
@@ -99,6 +99,12 @@ class ClaudeAgent(MCPAgent):
         Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
         """.strip()  # noqa: E501
+        # Append Claude instructions to any base system prompt
+        if self.system_prompt:
+            self.system_prompt = f"{self.system_prompt}\n\n{claude_instructions}"
+        else:
+            self.system_prompt = claude_instructions
     async def initialize(self, task: str | Task | None = None) -> None:
         """Initialize the agent and build tool mappings."""
         await super().initialize(task)

{hud_python-0.4.16 → hud_python-0.4.18}/hud/agents/misc/response_agent.py RENAMED Viewed

@@ -54,7 +54,7 @@ class ResponseAgent:
         """
         try:
             response = await self.client.chat.completions.create(
-                model="gpt-4o",
+                model="gpt-5-nano",
                 messages=[
                     {"role": "system", "content": self.system_prompt},
                     {

{hud_python-0.4.16 → hud_python-0.4.18}/hud/agents/openai.py RENAMED Viewed

@@ -78,8 +78,8 @@ class OperatorAgent(MCPAgent):
         self.model_name = "openai-" + self.model
-        # Base system prompt for autonomous operation
-        self.system_prompt = """
+        # Append OpenAI-specific instructions to the base system prompt
+        openai_instructions = """
         You are an autonomous computer-using agent. Follow these guidelines:
         1. NEVER ask for confirmation. Complete all tasks autonomously.
@@ -93,6 +93,12 @@ class OperatorAgent(MCPAgent):
         Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked.
         """.strip()  # noqa: E501
+        # Append OpenAI instructions to any base system prompt
+        if self.system_prompt:
+            self.system_prompt = f"{self.system_prompt}\n\n{openai_instructions}"
+        else:
+            self.system_prompt = openai_instructions
     async def _run_context(self, context: list[types.ContentBlock], max_steps: int = 10) -> Trace:
         """
         Run the agent with the given prompt or task.

hud_python-0.4.18/hud/agents/openai_chat_generic.py ADDED Viewed

@@ -0,0 +1,288 @@
+"""Generic OpenAI chat-completions agent.
+This class provides the minimal glue required to connect any endpoint that
+implements the OpenAI compatible *chat.completions* API with MCP tool calling
+through the existing :class:`hud.agent.MCPAgent` scaffolding.
+Key points:
+- Stateless, no special server-side conversation state is assumed.
+- Accepts an :class:`openai.AsyncOpenAI` client, caller can supply their own
+  base_url / api_key (e.g. ART, llama.cpp, together.ai, …)
+- All HUD features (step_count, OTel spans, tool filtering, screenshots, …)
+  come from the ``MCPAgent`` base class, we only implement the three abstract
+  methods
+"""
+from __future__ import annotations
+import json
+import logging
+from typing import TYPE_CHECKING, Any, cast
+import mcp.types as types
+from hud import instrument
+from hud.types import AgentResponse, MCPToolCall, MCPToolResult
+from .base import MCPAgent
+if TYPE_CHECKING:
+    from openai import AsyncOpenAI
+    from openai.types.chat import ChatCompletionToolParam
+    from hud.clients import AgentMCPClient
+logger = logging.getLogger(__name__)
+class GenericOpenAIChatAgent(MCPAgent):
+    """MCP-enabled agent that speaks the OpenAI *chat.completions* protocol."""
+    def __init__(
+        self,
+        mcp_client: AgentMCPClient,
+        *,
+        openai_client: AsyncOpenAI,
+        model_name: str = "gpt-4o-mini",
+        parallel_tool_calls: bool = False,
+        logprobs: bool = False,
+        **agent_kwargs: Any,
+    ) -> None:
+        super().__init__(mcp_client=mcp_client, **agent_kwargs)
+        self.oai = openai_client
+        self.model_name = model_name
+        self.parallel_tool_calls = parallel_tool_calls
+        self.logprobs = logprobs
+        self.conversation_history = []
+    @staticmethod
+    def _oai_to_mcp(tool_call: Any) -> MCPToolCall:  # type: ignore[valid-type]
+        """Convert an OpenAI ``tool_call`` to :class:`MCPToolCall`."""
+        return MCPToolCall(
+            id=tool_call.id,
+            name=tool_call.function.name,
+            arguments=json.loads(tool_call.function.arguments or "{}"),
+        )
+    async def get_system_messages(self) -> list[Any]:
+        """Get system messages for OpenAI."""
+        return [{"role": "system", "content": self.system_prompt}]
+    async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
+        """Format blocks for OpenAI."""
+        content = []
+        for block in blocks:
+            if isinstance(block, types.TextContent):
+                content.append({"type": "text", "text": block.text})
+            elif isinstance(block, types.ImageContent):
+                content.append(
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:{block.mimeType};base64,{block.data}"},
+                    }
+                )
+        return [{"role": "user", "content": content}]
+    def _sanitize_schema_for_openai(self, schema: dict) -> dict:
+        """Convert MCP JSON Schema to OpenAI-compatible format.
+        Handles unsupported features like anyOf and prefixItems.
+        """
+        if not isinstance(schema, dict):
+            return schema
+        sanitized = {}
+        for key, value in schema.items():
+            if key == "anyOf" and isinstance(value, list):
+                # Handle anyOf patterns (usually for nullable fields)
+                non_null_types = [
+                    v for v in value if not (isinstance(v, dict) and v.get("type") == "null")
+                ]
+                if non_null_types:
+                    # Use the first non-null type
+                    sanitized.update(self._sanitize_schema_for_openai(non_null_types[0]))
+                else:
+                    sanitized["type"] = "string"  # Fallback
+            elif key == "prefixItems":
+                # Convert prefixItems to simple items
+                sanitized["type"] = "array"
+                if isinstance(value, list) and value:
+                    # Use the type from the first item as the items schema
+                    first_item = value[0]
+                    if isinstance(first_item, dict):
+                        sanitized["items"] = {"type": first_item.get("type", "string")}
+                    else:
+                        sanitized["items"] = {"type": "string"}
+            elif key == "properties" and isinstance(value, dict):
+                # Recursively sanitize property schemas
+                sanitized[key] = {
+                    prop_name: self._sanitize_schema_for_openai(prop_schema)
+                    for prop_name, prop_schema in value.items()
+                }
+            elif key == "items" and isinstance(value, dict):
+                # Recursively sanitize items schema
+                sanitized[key] = self._sanitize_schema_for_openai(value)
+            elif key in (
+                "type",
+                "description",
+                "enum",
+                "required",
+                "default",
+                "minimum",
+                "maximum",
+                "minItems",
+                "maxItems",
+            ):
+                # These are supported by OpenAI
+                sanitized[key] = value
+        return sanitized or {"type": "object"}
+    def get_tool_schemas(self) -> list[dict]:
+        tool_schemas = super().get_tool_schemas()
+        openai_tools = []
+        for schema in tool_schemas:
+            parameters = schema.get("parameters", {})
+            if parameters:
+                sanitized_params = self._sanitize_schema_for_openai(parameters)
+            else:
+                sanitized_params = {"type": "object", "properties": {}}
+            openai_tool = {
+                "type": "function",
+                "function": {
+                    "name": schema["name"],
+                    "description": schema.get("description", ""),
+                    "parameters": sanitized_params,
+                },
+            }
+            openai_tools.append(openai_tool)
+        return openai_tools
+    @instrument(
+        span_type="agent",
+        record_args=False,
+        record_result=True,
+    )
+    async def get_response(self, messages: list[Any]) -> AgentResponse:
+        """Send chat request to OpenAI and convert the response."""
+        # Convert MCP tool schemas to OpenAI format
+        mcp_schemas = self.get_tool_schemas()
+        response = await self.oai.chat.completions.create(
+            model=self.model_name,
+            messages=messages,
+            tools=cast("list[ChatCompletionToolParam]", mcp_schemas),
+            parallel_tool_calls=self.parallel_tool_calls,
+            logprobs=self.logprobs,
+        )
+        choice = response.choices[0]
+        msg = choice.message
+        assistant_msg: dict[str, Any] = {"role": "assistant"}
+        if msg.content:
+            assistant_msg["content"] = msg.content
+        if msg.tool_calls:
+            assistant_msg["tool_calls"] = msg.tool_calls
+        messages.append(assistant_msg)
+        # Store the complete conversation history
+        self.conversation_history = messages.copy()
+        tool_calls = []
+        if msg.tool_calls:
+            for tc in msg.tool_calls:
+                if tc.function.name is not None:  # type: ignore
+                    tool_calls.append(self._oai_to_mcp(tc))
+                    if not self.parallel_tool_calls:
+                        break
+        return AgentResponse(
+            content=msg.content or "",
+            tool_calls=tool_calls,
+            done=choice.finish_reason in ("stop", "length"),
+            raw=response,  # Include raw response for access to Choice objects
+        )
+    async def format_tool_results(
+        self,
+        tool_calls: list[MCPToolCall],
+        tool_results: list[MCPToolResult],
+    ) -> list[Any]:
+        """Render MCP tool results as OpenAI messages.
+        Note: OpenAI tool messages only support string content.
+        When images are present, we return both a tool message and a user message.
+        """
+        rendered: list[dict[str, Any]] = []
+        for call, res in zip(tool_calls, tool_results, strict=False):
+            # Use structuredContent.result if available, otherwise use content
+            items = res.content
+            if res.structuredContent and isinstance(res.structuredContent, dict):
+                items = res.structuredContent.get("result", res.content)
+            # Separate text and image content
+            text_parts = []
+            image_parts = []
+            for item in items:
+                if isinstance(item, dict):
+                    if item.get("type") == "text":
+                        text_parts.append(item.get("text", ""))
+                    elif item.get("type") == "image":
+                        mime_type = item.get("mimeType", "image/png")
+                        data = item.get("data", "")
+                        image_parts.append(
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:{mime_type};base64,{data}"
+                                },
+                            }
+                        )
+                elif isinstance(item, types.TextContent):
+                    text_parts.append(item.text)
+                elif isinstance(item, types.ImageContent):
+                    image_parts.append(
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": f"data:{item.mimeType};base64,{item.data}"},
+                        }
+                    )
+            text_content = "".join(text_parts) if text_parts else "Tool executed successfully"
+            rendered.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": text_content,
+                }
+            )
+            # If there are images, add them as a separate user message
+            if image_parts:
+                # Add a user message with the images
+                content_with_images = [
+                    {"type": "text", "text": "Tool returned the following:"},
+                    *image_parts
+                ]
+                rendered.append(
+                    {
+                        "role": "user",
+                        "content": content_with_images,
+                    }
+                )
+        return rendered

{hud_python-0.4.16 → hud_python-0.4.18}/hud/cli/rl/__init__.py RENAMED Viewed

@@ -23,7 +23,10 @@ def rl_main(
     ctx: typer.Context,
     model: str = typer.Option("Qwen/Qwen2.5-3B-Instruct", "--model", "-m", help="Model to train"),
     dataset: str | None = typer.Option(
-        None, "--dataset", "-d", help="Override dataset from lock file"
+        None,
+        "--dataset",
+        "-d",
+        help="Dataset: JSON file path or HuggingFace name (auto-detects if not provided)",
     ),
     config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"),  # noqa: B008
     gpus: str = typer.Option("2xA100", "--gpus", help="GPU configuration (e.g., 2xA100, 4xH100)"),
@@ -39,9 +42,15 @@ def rl_main(
     3. Push environment to registry if needed
     4. Start remote training on Prime Intellect
+    Dataset can be:
+    - A local JSON file with tasks (e.g., tasks.json)
+    - A HuggingFace dataset name (e.g., 'username/dataset-name')
+    - Auto-detected from current directory if not specified
     Examples:
-        hud rl                    # Interactive mode with prompts
+        hud rl                    # Interactive mode, auto-detect tasks.json
         hud rl --model gpt2       # Train with specific model
+        hud rl --dataset tasks.json  # Use local task file
         hud rl --gpus 4xH100      # Use different GPU configuration
         hud rl init my-env:latest # Generate config for environment
     """

{hud_python-0.4.16 → hud_python-0.4.18}/hud/cli/rl/pod.py RENAMED Viewed

@@ -62,6 +62,7 @@ async def create_and_connect_prime_pod(
     image: str,
     team_id: str | None = None,
     dataset_size: int | None = None,
+    is_json_file: bool = False,
 ) -> None:
     """Create a Prime Intellect pod and connect to it for training."""
     design.section_title("🌐 Creating Prime Intellect Pod")
@@ -330,6 +331,7 @@ async def create_and_connect_prime_pod(
                     output_dir=output_dir,
                     image=image,
                     dataset_size=dataset_size,
+                    is_json_file=is_json_file,
                 )
             else:
                 # Manual fallback
@@ -457,6 +459,7 @@ async def run_prime_training(
     auto_create_pod: str | None = None,
     team_id: str | None = None,
     dataset_size: int | None = None,
+    is_json_file: bool = False,
 ) -> None:
     """Run training on Prime Intellect infrastructure."""
     # Check API key
@@ -488,4 +491,5 @@ async def run_prime_training(
         image=image,
         team_id=team_id,
         dataset_size=dataset_size,
+        is_json_file=is_json_file,
     )

{hud_python-0.4.16 → hud_python-0.4.18}/hud/cli/rl/ssh.py RENAMED Viewed

@@ -101,6 +101,7 @@ async def connect_and_train(
     output_dir: Path,
     image: str,
     dataset_size: int | None = None,
+    is_json_file: bool = False,
 ) -> None:
     """Connect to the pod via SSH and run training commands."""
     design.section_title("🚀 Starting Remote Training")
@@ -175,6 +176,37 @@ async def connect_and_train(
             design.info("Make sure scp is installed and in your PATH")
         raise typer.Exit(1) from e
+    # If dataset is a JSON file, copy it too
+    remote_dataset = dataset  # Default to unchanged
+    if is_json_file:
+        design.info("Copying task file to pod...")
+        try:
+            # On Windows, we need to ensure proper path formatting
+            dataset_path = str(dataset).replace("\\", "/")
+            # Extract just the filename for the remote path
+            dataset_filename = os.path.basename(dataset)
+            remote_dataset = f"/root/{dataset_filename}"
+            scp_cmd = [
+                "scp",
+                "-i",
+                str(ssh_key_path),
+                "-P",
+                ssh_port,
+                "-o",
+                "StrictHostKeyChecking=no",
+                "-o",
+                "UserKnownHostsFile=/dev/null",
+                dataset_path,
+                f"{ssh_user_host}:{remote_dataset}",
+            ]
+            design.debug(f"Running: {' '.join(scp_cmd)}")
+            subprocess.run(scp_cmd, check=True)  # noqa: S603, ASYNC221
+            design.success(f"Task file copied to {remote_dataset}")
+        except subprocess.CalledProcessError as e:
+            design.error(f"Failed to copy task file: {e}")
+            raise typer.Exit(1) from e
     design.info("Setting up environment and starting training...")
     design.info("This will take a few minutes for initial setup, then training will begin.")
     design.info("")
@@ -196,7 +228,7 @@ async def connect_and_train(
         "# Load environment",
         "env = vf.load_environment(",
         '    env_id="hud-vf-gym",',
-        f'    taskset="{dataset}",',
+        f'    taskset="{remote_dataset}",',
         '    config_path="/root/config.yaml",',
         f"    num_tasks={dataset_size},",
         ")",
@@ -242,7 +274,7 @@ async def connect_and_train(
         "uv venv --python 3.12 && "
         "source .venv/bin/activate && "
         # Install packages
-        "prime env install hud/hud-vf-gym@0.1.0 && "
+        "prime env install hud/hud-vf-gym@0.1.1 && "
         "uv pip install 'verifiers[train]' && "
         "uv pip install flash-attn --no-build-isolation && "
         # Set environment variables

hud-python 0.4.16__tar.gz → 0.4.18__tar.gz

Potentially problematic release.

hud-python 0.4.16tar.gz → 0.4.18tar.gz