PyPI - hud-python - Versions diffs - 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl - Mend

hud-python 0.4.45py3-none-any.whl → 0.5.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (282) hide show

hud/__init__.py +27 -7
hud/agents/__init__.py +70 -5
hud/agents/base.py +238 -500
hud/agents/claude.py +236 -247
hud/agents/gateway.py +42 -0
hud/agents/gemini.py +264 -0
hud/agents/gemini_cua.py +324 -0
hud/agents/grounded_openai.py +98 -100
hud/agents/misc/integration_test_agent.py +51 -20
hud/agents/misc/response_agent.py +48 -36
hud/agents/openai.py +282 -296
hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
hud/agents/operator.py +199 -0
hud/agents/resolver.py +70 -0
hud/agents/tests/conftest.py +133 -0
hud/agents/tests/test_base.py +300 -622
hud/agents/tests/test_base_runtime.py +233 -0
hud/agents/tests/test_claude.py +381 -214
hud/agents/tests/test_client.py +9 -10
hud/agents/tests/test_gemini.py +369 -0
hud/agents/tests/test_grounded_openai_agent.py +65 -50
hud/agents/tests/test_openai.py +377 -140
hud/agents/tests/test_operator.py +362 -0
hud/agents/tests/test_resolver.py +192 -0
hud/agents/tests/test_run_eval.py +179 -0
hud/agents/types.py +148 -0
hud/cli/__init__.py +493 -546
hud/cli/analyze.py +43 -5
hud/cli/build.py +699 -113
hud/cli/debug.py +8 -5
hud/cli/dev.py +889 -732
hud/cli/eval.py +793 -667
hud/cli/flows/dev.py +167 -0
hud/cli/flows/init.py +191 -0
hud/cli/flows/tasks.py +153 -56
hud/cli/flows/templates.py +151 -0
hud/cli/flows/tests/__init__.py +1 -0
hud/cli/flows/tests/test_dev.py +126 -0
hud/cli/init.py +60 -58
hud/cli/pull.py +1 -1
hud/cli/push.py +38 -13
hud/cli/rft.py +311 -0
hud/cli/rft_status.py +145 -0
hud/cli/tests/test_analyze.py +5 -5
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +110 -8
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_init.py +6 -1
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +140 -0
hud/cli/tests/test_convert.py +361 -0
hud/cli/tests/test_debug.py +12 -10
hud/cli/tests/test_dev.py +197 -0
hud/cli/tests/test_eval.py +251 -0
hud/cli/tests/test_eval_bedrock.py +51 -0
hud/cli/tests/test_init.py +124 -0
hud/cli/tests/test_main_module.py +11 -5
hud/cli/tests/test_mcp_server.py +12 -100
hud/cli/tests/test_push.py +1 -1
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/tests/test_registry.py +1 -1
hud/cli/tests/test_utils.py +1 -1
hud/cli/{rl → utils}/celebrate.py +14 -12
hud/cli/utils/config.py +18 -1
hud/cli/utils/docker.py +130 -4
hud/cli/utils/env_check.py +9 -9
hud/cli/utils/git.py +136 -0
hud/cli/utils/interactive.py +39 -5
hud/cli/utils/metadata.py +70 -1
hud/cli/utils/runner.py +1 -1
hud/cli/utils/server.py +2 -2
hud/cli/utils/source_hash.py +3 -3
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_git.py +142 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +258 -0
hud/cli/{rl → utils}/viewer.py +2 -2
hud/clients/README.md +12 -11
hud/clients/__init__.py +4 -3
hud/clients/base.py +166 -26
hud/clients/environment.py +51 -0
hud/clients/fastmcp.py +13 -6
hud/clients/mcp_use.py +45 -15
hud/clients/tests/test_analyze_scenarios.py +206 -0
hud/clients/tests/test_protocol.py +9 -3
hud/datasets/__init__.py +23 -20
hud/datasets/loader.py +326 -0
hud/datasets/runner.py +198 -105
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_loader.py +221 -0
hud/datasets/tests/test_utils.py +315 -0
hud/datasets/utils.py +270 -90
hud/environment/__init__.py +52 -0
hud/environment/connection.py +258 -0
hud/environment/connectors/__init__.py +33 -0
hud/environment/connectors/base.py +68 -0
hud/environment/connectors/local.py +177 -0
hud/environment/connectors/mcp_config.py +137 -0
hud/environment/connectors/openai.py +101 -0
hud/environment/connectors/remote.py +172 -0
hud/environment/environment.py +835 -0
hud/environment/integrations/__init__.py +45 -0
hud/environment/integrations/adk.py +67 -0
hud/environment/integrations/anthropic.py +196 -0
hud/environment/integrations/gemini.py +92 -0
hud/environment/integrations/langchain.py +82 -0
hud/environment/integrations/llamaindex.py +68 -0
hud/environment/integrations/openai.py +238 -0
hud/environment/mock.py +306 -0
hud/environment/router.py +263 -0
hud/environment/scenarios.py +620 -0
hud/environment/tests/__init__.py +1 -0
hud/environment/tests/test_connection.py +317 -0
hud/environment/tests/test_connectors.py +205 -0
hud/environment/tests/test_environment.py +593 -0
hud/environment/tests/test_integrations.py +257 -0
hud/environment/tests/test_local_connectors.py +242 -0
hud/environment/tests/test_scenarios.py +1086 -0
hud/environment/tests/test_tools.py +208 -0
hud/environment/types.py +23 -0
hud/environment/utils/__init__.py +35 -0
hud/environment/utils/formats.py +215 -0
hud/environment/utils/schema.py +171 -0
hud/environment/utils/tool_wrappers.py +113 -0
hud/eval/__init__.py +67 -0
hud/eval/context.py +727 -0
hud/eval/display.py +299 -0
hud/eval/instrument.py +187 -0
hud/eval/manager.py +533 -0
hud/eval/parallel.py +268 -0
hud/eval/task.py +372 -0
hud/eval/tests/__init__.py +1 -0
hud/eval/tests/test_context.py +178 -0
hud/eval/tests/test_eval.py +210 -0
hud/eval/tests/test_manager.py +152 -0
hud/eval/tests/test_parallel.py +168 -0
hud/eval/tests/test_task.py +291 -0
hud/eval/types.py +65 -0
hud/eval/utils.py +194 -0
hud/patches/__init__.py +19 -0
hud/patches/mcp_patches.py +308 -0
hud/patches/warnings.py +54 -0
hud/samples/browser.py +4 -4
hud/server/__init__.py +2 -1
hud/server/low_level.py +2 -1
hud/server/router.py +164 -0
hud/server/server.py +567 -80
hud/server/tests/test_mcp_server_integration.py +11 -11
hud/server/tests/test_mcp_server_more.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/settings.py +45 -3
hud/shared/exceptions.py +36 -10
hud/shared/hints.py +26 -1
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +40 -31
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +20 -19
hud/telemetry/exporter.py +201 -0
hud/telemetry/instrument.py +165 -253
hud/telemetry/tests/test_eval_telemetry.py +356 -0
hud/telemetry/tests/test_exporter.py +258 -0
hud/telemetry/tests/test_instrument.py +401 -0
hud/tools/__init__.py +18 -2
hud/tools/agent.py +223 -0
hud/tools/apply_patch.py +639 -0
hud/tools/base.py +54 -4
hud/tools/bash.py +2 -2
hud/tools/computer/__init__.py +36 -3
hud/tools/computer/anthropic.py +2 -2
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/hud.py +23 -6
hud/tools/computer/openai.py +20 -21
hud/tools/computer/qwen.py +434 -0
hud/tools/computer/settings.py +37 -0
hud/tools/edit.py +3 -7
hud/tools/executors/base.py +4 -2
hud/tools/executors/pyautogui.py +1 -1
hud/tools/grounding/grounded_tool.py +13 -18
hud/tools/grounding/grounder.py +10 -31
hud/tools/grounding/tests/test_grounded_tool.py +26 -44
hud/tools/jupyter.py +330 -0
hud/tools/playwright.py +18 -3
hud/tools/shell.py +308 -0
hud/tools/tests/test_agent_tool.py +355 -0
hud/tools/tests/test_apply_patch.py +718 -0
hud/tools/tests/test_computer.py +4 -9
hud/tools/tests/test_computer_actions.py +24 -2
hud/tools/tests/test_jupyter_tool.py +181 -0
hud/tools/tests/test_shell.py +596 -0
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/tools/types.py +21 -1
hud/types.py +194 -56
hud/utils/__init__.py +2 -0
hud/utils/env.py +67 -0
hud/utils/hud_console.py +89 -18
hud/utils/mcp.py +15 -58
hud/utils/strict_schema.py +162 -0
hud/utils/tests/test_init.py +1 -2
hud/utils/tests/test_mcp.py +1 -28
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/utils/types.py +20 -0
hud/version.py +1 -1
hud_python-0.5.13.dist-info/METADATA +264 -0
hud_python-0.5.13.dist-info/RECORD +305 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
hud/agents/langchain.py +0 -261
hud/agents/lite_llm.py +0 -72
hud/cli/rl/__init__.py +0 -180
hud/cli/rl/config.py +0 -101
hud/cli/rl/display.py +0 -133
hud/cli/rl/gpu.py +0 -63
hud/cli/rl/gpu_utils.py +0 -321
hud/cli/rl/local_runner.py +0 -595
hud/cli/rl/presets.py +0 -96
hud/cli/rl/remote_runner.py +0 -463
hud/cli/rl/rl_api.py +0 -150
hud/cli/rl/vllm.py +0 -177
hud/cli/rl/wait_utils.py +0 -89
hud/datasets/parallel.py +0 -687
hud/misc/__init__.py +0 -1
hud/misc/claude_plays_pokemon.py +0 -292
hud/otel/__init__.py +0 -35
hud/otel/collector.py +0 -142
hud/otel/config.py +0 -181
hud/otel/context.py +0 -570
hud/otel/exporters.py +0 -369
hud/otel/instrumentation.py +0 -135
hud/otel/processors.py +0 -121
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_processors.py +0 -197
hud/rl/README.md +0 -30
hud/rl/__init__.py +0 -1
hud/rl/actor.py +0 -176
hud/rl/buffer.py +0 -405
hud/rl/chat_template.jinja +0 -101
hud/rl/config.py +0 -192
hud/rl/distributed.py +0 -132
hud/rl/learner.py +0 -637
hud/rl/tests/__init__.py +0 -1
hud/rl/tests/test_learner.py +0 -186
hud/rl/train.py +0 -382
hud/rl/types.py +0 -101
hud/rl/utils/start_vllm_server.sh +0 -30
hud/rl/utils.py +0 -524
hud/rl/vllm_adapter.py +0 -143
hud/telemetry/job.py +0 -352
hud/telemetry/replay.py +0 -74
hud/telemetry/tests/test_replay.py +0 -40
hud/telemetry/tests/test_trace.py +0 -63
hud/telemetry/trace.py +0 -158
hud/utils/agent_factories.py +0 -86
hud/utils/async_utils.py +0 -65
hud/utils/group_eval.py +0 -223
hud/utils/progress.py +0 -149
hud/utils/tasks.py +0 -127
hud/utils/tests/test_async_utils.py +0 -173
hud/utils/tests/test_progress.py +0 -261
hud_python-0.4.45.dist-info/METADATA +0 -552
hud_python-0.4.45.dist-info/RECORD +0 -228
{hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0

hud/tools/tests/test_types.py ADDED Viewed

@@ -0,0 +1,193 @@
+from __future__ import annotations
+import pytest
+from mcp.types import ImageContent, TextContent
+from hud.tools.types import ContentResult, EvaluationResult, ToolError
+def test_evaluation_result_defaults():
+    """Test EvaluationResult with default values."""
+    result = EvaluationResult()
+    assert result.reward == 0.0
+    assert result.done is False
+    assert result.content is None
+    assert result.info == {}
+    assert result.isError is False
+def test_evaluation_result_with_values():
+    """Test EvaluationResult with custom values."""
+    result = EvaluationResult(
+        reward=0.95,
+        done=True,
+        content="Task completed successfully",
+        info={"steps": 5},
+        isError=False,
+    )
+    assert result.reward == 0.95
+    assert result.done is True
+    assert result.content == "Task completed successfully"
+    assert result.info == {"steps": 5}
+    assert result.isError is False
+def test_content_result_defaults():
+    """Test ContentResult with default values."""
+    result = ContentResult()
+    assert result.output is None
+    assert result.error is None
+    assert result.base64_image is None
+    assert result.system is None
+def test_content_result_with_values():
+    """Test ContentResult with custom values."""
+    result = ContentResult(
+        output="Command executed",
+        error="No errors",
+        base64_image="base64data",
+        system="System message",
+    )
+    assert result.output == "Command executed"
+    assert result.error == "No errors"
+    assert result.base64_image == "base64data"
+    assert result.system == "System message"
+def test_content_result_add_both_output():
+    """Test adding two ContentResults with output."""
+    result1 = ContentResult(output="Part 1")
+    result2 = ContentResult(output=" Part 2")
+    combined = result1 + result2
+    assert combined.output == "Part 1 Part 2"
+    assert combined.error is None
+    assert combined.base64_image is None
+def test_content_result_add_both_error():
+    """Test adding two ContentResults with errors."""
+    result1 = ContentResult(error="Error 1")
+    result2 = ContentResult(error=" Error 2")
+    combined = result1 + result2
+    assert combined.error == "Error 1 Error 2"
+    assert combined.output is None
+def test_content_result_add_both_system():
+    """Test adding two ContentResults with system messages."""
+    result1 = ContentResult(system="System 1")
+    result2 = ContentResult(system=" System 2")
+    combined = result1 + result2
+    assert combined.system == "System 1 System 2"
+def test_content_result_add_one_sided():
+    """Test adding ContentResults where only one has values."""
+    result1 = ContentResult(output="Output")
+    result2 = ContentResult(error="Error")
+    combined = result1 + result2
+    assert combined.output == "Output"
+    assert combined.error == "Error"
+def test_content_result_add_images_raises_error():
+    """Test that combining two results with images raises an error."""
+    result1 = ContentResult(base64_image="image1")
+    result2 = ContentResult(base64_image="image2")
+    with pytest.raises(ValueError, match="Cannot combine tool results"):
+        _ = result1 + result2
+def test_content_result_add_one_image():
+    """Test adding ContentResults where only one has an image."""
+    result1 = ContentResult(base64_image="image1")
+    result2 = ContentResult(output="Output")
+    combined = result1 + result2
+    assert combined.base64_image == "image1"
+    assert combined.output == "Output"
+def test_content_result_to_content_blocks_output():
+    """Test converting ContentResult with output to content blocks."""
+    result = ContentResult(output="Test output")
+    blocks = result.to_content_blocks()
+    assert len(blocks) == 1
+    assert isinstance(blocks[0], TextContent)
+    assert blocks[0].text == "Test output"
+def test_content_result_to_content_blocks_error():
+    """Test converting ContentResult with error to content blocks."""
+    result = ContentResult(error="Test error")
+    blocks = result.to_content_blocks()
+    assert len(blocks) == 1
+    assert isinstance(blocks[0], TextContent)
+    assert blocks[0].text == "Test error"
+def test_content_result_to_content_blocks_image():
+    """Test converting ContentResult with image to content blocks."""
+    result = ContentResult(base64_image="base64data")
+    blocks = result.to_content_blocks()
+    assert len(blocks) == 1
+    assert isinstance(blocks[0], ImageContent)
+    assert blocks[0].data == "base64data"
+    assert blocks[0].mimeType == "image/png"
+def test_content_result_to_content_blocks_all():
+    """Test converting ContentResult with all fields to content blocks."""
+    result = ContentResult(
+        output="Output",
+        error="Error",
+        base64_image="image",
+    )
+    blocks = result.to_content_blocks()
+    assert len(blocks) == 3
+    assert isinstance(blocks[0], TextContent)
+    assert blocks[0].text == "Output"
+    assert isinstance(blocks[1], TextContent)
+    assert blocks[1].text == "Error"
+    assert isinstance(blocks[2], ImageContent)
+    assert blocks[2].data == "image"
+def test_content_result_to_content_blocks_empty():
+    """Test converting empty ContentResult to content blocks."""
+    result = ContentResult()
+    blocks = result.to_content_blocks()
+    assert len(blocks) == 0
+def test_tool_error():
+    """Test ToolError exception."""
+    error = ToolError("Test error message")
+    assert isinstance(error, Exception)
+    assert str(error) == "Test error message"

hud/tools/types.py CHANGED Viewed

@@ -6,6 +6,18 @@ from mcp.types import ContentBlock, ImageContent, TextContent
 from pydantic import BaseModel, ConfigDict, Field
+class Coordinate(BaseModel):
+    """A coordinate point with x and y values.
+    Used for path-based actions like drag operations.
+    """
+    model_config = ConfigDict(extra="forbid")
+    x: int = Field(..., description="X coordinate")
+    y: int = Field(..., description="Y coordinate")
 class EvaluationResult(BaseModel):
     """Standard evaluation result format."""
@@ -28,6 +40,7 @@ class ContentResult(BaseModel):
     error: str | None = Field(default=None, description="Error message")
     base64_image: str | None = Field(default=None, description="Base64-encoded image")
     system: str | None = Field(default=None, description="System message")
+    url: str | None = Field(default=None, description="Current page URL (for browser automation)")
     def __add__(self, other: ContentResult) -> ContentResult:
         def combine_fields(
@@ -44,6 +57,7 @@ class ContentResult(BaseModel):
             error=combine_fields(self.error, other.error),
             base64_image=combine_fields(self.base64_image, other.base64_image, False),
             system=combine_fields(self.system, other.system),
+            url=combine_fields(self.url, other.url, False),
         )
     def to_content_blocks(self) -> list[ContentBlock]:
@@ -55,7 +69,7 @@ class ContentResult(BaseModel):
             result: ContentResult to convert
         Returns:
-            List of ContentBlock
+            List of ContentBlock with URL embedded as metadata if available
         """
         blocks: list[ContentBlock] = []
@@ -65,6 +79,12 @@ class ContentResult(BaseModel):
             blocks.append(TextContent(text=self.error, type="text"))
         if self.base64_image:
             blocks.append(ImageContent(data=self.base64_image, mimeType="image/png", type="image"))
+        # Add URL as a special metadata text block (for Gemini Computer Use)
+        # Always include URL if set, even if it's a placeholder like "about:blank"
+        if self.url:
+            blocks.append(TextContent(text=f"__URL__:{self.url}", type="text"))
         return blocks

hud/types.py CHANGED Viewed

@@ -1,11 +1,9 @@
 from __future__ import annotations
-import contextlib
 import json
 import logging
 import uuid
-from collections import defaultdict
-from string import Template
+from enum import Enum
 from typing import Any, Literal
 import mcp.types as types
@@ -13,22 +11,134 @@ from mcp.types import CallToolRequestParams, CallToolResult
 from pydantic import BaseModel, ConfigDict, Field, field_validator
 from hud.settings import settings
+from hud.utils.env import resolve_env_vars as _resolve_env_vars
 from hud.utils.tool_shorthand import normalize_to_tool_call_dict
 logger = logging.getLogger(__name__)
+# Guard to ensure we only log missing HUD_API_KEY once
+_missing_api_key_error_logged: bool = False
-class Task(BaseModel):
+class AgentType(str, Enum):
+    CLAUDE = "claude"
+    OPENAI = "openai"
+    OPERATOR = "operator"
+    GEMINI = "gemini"
+    GEMINI_CUA = "gemini_cua"
+    OPENAI_COMPATIBLE = "openai_compatible"
+    INTEGRATION_TEST = "integration_test"
+    @property
+    def cls(self) -> type:
+        if self == AgentType.CLAUDE:
+            from hud.agents.claude import ClaudeAgent
+            return ClaudeAgent
+        elif self == AgentType.OPENAI:
+            from hud.agents import OpenAIAgent
+            return OpenAIAgent
+        elif self == AgentType.OPERATOR:
+            from hud.agents import OperatorAgent
+            return OperatorAgent
+        elif self == AgentType.GEMINI:
+            from hud.agents.gemini import GeminiAgent
+            return GeminiAgent
+        elif self == AgentType.GEMINI_CUA:
+            from hud.agents.gemini_cua import GeminiCUAAgent
+            return GeminiCUAAgent
+        elif self == AgentType.OPENAI_COMPATIBLE:
+            from hud.agents.openai_chat import OpenAIChatAgent
+            return OpenAIChatAgent
+        elif self == AgentType.INTEGRATION_TEST:
+            from hud.agents.misc.integration_test_agent import IntegrationTestRunner
+            return IntegrationTestRunner
+        else:
+            raise ValueError(f"Unsupported agent type: {self}")
+    @property
+    def config_cls(self) -> type:
+        """Get config class without importing agent (avoids SDK dependency)."""
+        from hud.agents.types import (
+            ClaudeConfig,
+            GeminiConfig,
+            GeminiCUAConfig,
+            OpenAIChatConfig,
+            OpenAIConfig,
+            OperatorConfig,
+        )
+        mapping: dict[AgentType, type] = {
+            AgentType.CLAUDE: ClaudeConfig,
+            AgentType.OPENAI: OpenAIConfig,
+            AgentType.OPERATOR: OperatorConfig,
+            AgentType.GEMINI: GeminiConfig,
+            AgentType.GEMINI_CUA: GeminiCUAConfig,
+            AgentType.OPENAI_COMPATIBLE: OpenAIChatConfig,
+            AgentType.INTEGRATION_TEST: BaseAgentConfig,
+        }
+        if self not in mapping:
+            raise ValueError(f"Unsupported agent type for config: {self}")
+        return mapping[self]
+class BaseAgentConfig(BaseModel):
+    """Agent configuration for LLM-specific settings.
+    Note: allowed_tools, disallowed_tools, response_tool_name, append_setup_output,
+    and initial_screenshot are kept for backwards compatibility with v4 task configs
+    but are no longer applied at the agent level. These should be configured on the
+    Environment/Task instead.
+    """
+    model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid", populate_by_name=True)
+    # LLM-specific setting
+    system_prompt: str | None = None
+    # Deprecated: kept for backwards compat with v4 task configs
+    # allowed_tools/disallowed_tools are applied at Environment level
+    # append_setup_output is applied by EvalContext -> agent
+    # response_tool_name and initial_screenshot are parsed but NOT implemented
+    allowed_tools: list[str] | None = None
+    disallowed_tools: list[str] | None = None
+    response_tool_name: str | None = None  # Not implemented
+    append_setup_output: bool = False
+    append_setup_tool: bool = False  # Alias for append_setup_output
+    initial_screenshot: bool = False  # Not implemented
+class LegacyTask(BaseModel):
     """
+    DEPRECATED: Use Task from env() instead.
     A task configuration that can be used to create a task.
     The mcp_config field supports environment variable substitution using
     template placeholders in the format ${VAR_NAME} or ${VAR_NAME:default_value}.
-    Example:
+    .. deprecated:: 0.5.0
+        LegacyTask is deprecated in v0.5.0 and will be removed in v0.6.0
+        (no earlier than March 1st, 2026).
+        Use one of these migration paths:
+        1. Quick conversion: ``Task.from_v4(legacy_task)`` converts LegacyTask to Task
+        2. Full migration: Use ``@env.scenario()`` with setup code before first yield
+           and evaluate code after first yield
+        See https://docs.hud.ai/migration for the full migration guide.
+    Example (deprecated):
         mcp_config: {
             "hud": {
-                "url": "${HUD_MCP_URL:https://mcp.hud.so/v3/mcp}",
+                "url": "${HUD_MCP_URL:https://mcp.hud.ai/v3/mcp}",
                 "headers": {
                     "Authorization": "Bearer ${HUD_API_KEY}",
                     "Mcp-Image": "your-mcp-image"
@@ -43,10 +153,23 @@ class Task(BaseModel):
     setup_tool: MCPToolCall | list[MCPToolCall] | None = None
     evaluate_tool: MCPToolCall | list[MCPToolCall] | None = None
     integration_test_tool: MCPToolCall | list[MCPToolCall] | None = None
-    agent_tools: list[str] | None = None
-    system_prompt: str | None = None
+    agent_config: BaseAgentConfig | None = None
     metadata: dict[str, Any] = Field(default_factory=dict)
+    def __init__(self, **data: Any) -> None:
+        """Initialize LegacyTask with deprecation warning."""
+        import warnings
+        warnings.warn(
+            "LegacyTask is deprecated in v0.5.0 and will be removed in v0.6.0 "
+            "(no earlier than March 1st, 2026). "
+            "Use Task.from_v4() for quick conversion, or migrate to @env.scenario(). "
+            "See https://docs.hud.ai/migration for details.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        super().__init__(**data)
     @field_validator("mcp_config", "metadata", mode="before")
     @classmethod
     def parse_json_strings(cls, v: Any) -> Any:
@@ -60,6 +183,25 @@ class Task(BaseModel):
                 raise HudConfigError(f"Invalid JSON string: {e}") from e
         return v
+    @field_validator("agent_config", mode="before")
+    @classmethod
+    def parse_agent_config(cls, v: Any) -> BaseAgentConfig | None:
+        """Parse agent_config into BaseAgentConfig."""
+        if v is None:
+            return None
+        if isinstance(v, BaseAgentConfig):
+            return v
+        if isinstance(v, str):
+            try:
+                v = json.loads(v)
+            except json.JSONDecodeError as e:
+                from hud.shared.exceptions import HudConfigError
+                raise HudConfigError(f"Invalid JSON string for agent_config: {e}") from e
+        if isinstance(v, dict):
+            return BaseAgentConfig(**v)
+        return v
     @field_validator("setup_tool", "evaluate_tool", "integration_test_tool", mode="before")
     @classmethod
     def convert_dict_to_tool_call(cls, v: Any, info: Any) -> Any:
@@ -98,44 +240,21 @@ class Task(BaseModel):
     @classmethod
     def resolve_env_vars(cls, v: dict[str, Any]) -> dict[str, Any]:
         """
-        Automatically resolve environment variables in mcp_config using Template.
+        Automatically resolve environment variables in mcp_config.
         Supports ${VAR_NAME} syntax with variable substitution from
-        System environment variables (including HUD_API_KEY, etc.)
+        system environment variables and settings (including HUD_API_KEY, etc.)
         Missing variables resolve to empty strings.
         """
-        import os
-        # Start with current environment variables
-        mapping = dict(os.environ)
-        # Include settings (from process env, project .env, and user .env)
-        settings_dict = settings.model_dump()
-        mapping.update(settings_dict)
-        # Add UPPERCASE aliases for settings keys
-        for _key, _val in settings_dict.items():
-            with contextlib.suppress(Exception):
-                mapping[_key.upper()] = _val
-        if settings.api_key:
-            mapping["HUD_API_KEY"] = settings.api_key
-        else:
-            logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
+        # Warn once if HUD_API_KEY is not set
+        if not settings.api_key:
+            global _missing_api_key_error_logged
+            if not _missing_api_key_error_logged:
+                logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
+                _missing_api_key_error_logged = True
-        def substitute_in_value(obj: Any) -> Any:
-            """Recursively substitute variables in nested structures."""
-            if isinstance(obj, str):
-                # Use Template's substitute with defaultdict - missing vars become empty strings
-                safe_mapping = defaultdict(str, mapping)
-                return Template(obj).substitute(safe_mapping)
-            elif isinstance(obj, dict):
-                return {k: substitute_in_value(v) for k, v in obj.items()}
-            elif isinstance(obj, list):
-                return [substitute_in_value(item) for item in obj]
-            else:
-                return obj
-        return substitute_in_value(v)
+        return _resolve_env_vars(v)
 class MCPToolCall(CallToolRequestParams):
@@ -164,7 +283,9 @@ class MCPToolCall(CallToolRequestParams):
 class MCPToolResult(CallToolResult):
-    """A tool result."""
+    """A tool result with optional call_id for correlation."""
+    call_id: str | None = None  # For correlating with provider-specific tool call IDs
     def _get_content_summary(self) -> str:
         """Extract a summary of the content."""
@@ -216,7 +337,7 @@ class AgentResponse(BaseModel):
     tool_calls: list[MCPToolCall] = Field(default_factory=list)
     done: bool = Field(default=False)
-    # --- TELEMETRY [hud.so] ---
+    # --- TELEMETRY [hud.ai] ---
     # Responses
     content: str | None = Field(default=None)
     reasoning: str | None = Field(default=None)
@@ -267,6 +388,27 @@ class TraceStep(BaseModel):
     model_config = ConfigDict(populate_by_name=True, extra="allow")
+class HudSpan(BaseModel):
+    """A telemetry span ready for export to HUD API."""
+    name: str
+    trace_id: str = Field(pattern=r"^[0-9a-fA-F]{32}$")
+    span_id: str = Field(pattern=r"^[0-9a-fA-F]{16}$")
+    parent_span_id: str | None = Field(default=None, pattern=r"^[0-9a-fA-F]{16}$")
+    start_time: str  # ISO format
+    end_time: str  # ISO format
+    status_code: str  # "UNSET", "OK", "ERROR"
+    status_message: str | None = None
+    attributes: TraceStep
+    exceptions: list[dict[str, Any]] | None = None
+    internal_type: str | None = None
+    model_config = ConfigDict(extra="forbid")
 class Trace(BaseModel):
     """Unified result from agent execution (task or prompt).
@@ -286,7 +428,7 @@ class Trace(BaseModel):
     isError: bool = Field(default=False)
     # Metadata
-    task: Task | None = Field(default=None)
+    task: LegacyTask | None = Field(default=None)
     # Trace
     trace: list[TraceStep] = Field(default_factory=list)
@@ -302,26 +444,22 @@ class Trace(BaseModel):
     def append(self, step: TraceStep) -> None:
         self.trace.append(step)
-    def populate_from_context(self) -> None:
-        """Populate trace steps from the current trace context if available.
-        This checks if we're executing within a hud.trace() context and
-        automatically populates the trace field with collected steps.
-        """
-        from hud.otel.context import get_current_task_run_id
-        from hud.telemetry.replay import get_trace
-        task_run_id = get_current_task_run_id()
-        if task_run_id:
-            collected_trace = get_trace(task_run_id)
-            if collected_trace:
-                self.trace = collected_trace.trace
+# Re-export Task for backwards compatibility (after module defs to avoid circular import)
+from hud.eval.task import Task  # noqa: E402
+# Type alias for functions that accept v5 Task, v4 LegacyTask, or raw dicts
+TaskInput = Task | LegacyTask | dict[str, Any]
 __all__ = [
     "AgentResponse",
+    "AgentType",
+    "HudSpan",
+    "LegacyTask",
     "MCPToolCall",
     "MCPToolResult",
+    "Task",
+    "TaskInput",
     "Trace",
     "TraceStep",
 ]

hud/utils/__init__.py CHANGED Viewed

@@ -2,9 +2,11 @@ from __future__ import annotations
 from .hud_console import HUDConsole, hud_console
 from .telemetry import stream
+from .types import with_signature
 __all__ = [
     "HUDConsole",
     "hud_console",
     "stream",
+    "with_signature",
 ]

hud/utils/env.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""Environment variable resolution utilities."""
+from __future__ import annotations
+import contextlib
+import os
+from collections import defaultdict
+from string import Template
+from typing import TYPE_CHECKING, Any
+from hud.settings import settings
+if TYPE_CHECKING:
+    from collections.abc import Mapping
+def resolve_env_vars(obj: Any, extra_mapping: Mapping[str, Any] | None = None) -> Any:
+    """Recursively resolve ${VAR_NAME} placeholders in strings.
+    Uses Python's string.Template for substitution. Sources values from:
+    1. os.environ
+    2. hud.settings (loads from project .env and ~/.hud/.env)
+    3. Optional extra_mapping parameter
+    Uppercase aliases are automatically added for settings keys,
+    so both ${api_key} and ${API_KEY} work.
+    Missing variables resolve to empty strings.
+    Args:
+        obj: The object to resolve (string, dict, list, or other).
+        extra_mapping: Optional additional key-value pairs to include.
+    Returns:
+        The object with all ${VAR_NAME} placeholders resolved.
+    Example:
+        >>> resolve_env_vars({"key": "${MY_VAR}"})
+        {'key': 'resolved_value'}
+    """
+    # Build mapping from environment and settings
+    mapping: dict[str, Any] = dict(os.environ)
+    settings_dict = settings.model_dump()
+    mapping.update(settings_dict)
+    # Add UPPERCASE aliases for settings keys
+    for key, val in settings_dict.items():
+        with contextlib.suppress(Exception):
+            mapping[key.upper()] = val
+    if settings.api_key:
+        mapping["HUD_API_KEY"] = settings.api_key
+    if extra_mapping:
+        mapping.update(extra_mapping)
+    def substitute(value: Any) -> Any:
+        if isinstance(value, str):
+            safe_mapping = defaultdict(str, mapping)
+            return Template(value).substitute(safe_mapping)
+        elif isinstance(value, dict):
+            return {k: substitute(v) for k, v in value.items()}
+        elif isinstance(value, list):
+            return [substitute(item) for item in value]
+        return value
+    return substitute(obj)

hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

hud-python 0.4.45py3-none-any.whl → 0.5.13py3-none-any.whl