PyPI - hud-python - Versions diffs - 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (274) hide show

hud/__init__.py +27 -7
hud/agents/__init__.py +11 -5
hud/agents/base.py +220 -500
hud/agents/claude.py +200 -240
hud/agents/gemini.py +275 -0
hud/agents/gemini_cua.py +335 -0
hud/agents/grounded_openai.py +98 -100
hud/agents/misc/integration_test_agent.py +51 -20
hud/agents/misc/response_agent.py +41 -36
hud/agents/openai.py +291 -292
hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
hud/agents/operator.py +211 -0
hud/agents/tests/conftest.py +133 -0
hud/agents/tests/test_base.py +300 -622
hud/agents/tests/test_base_runtime.py +233 -0
hud/agents/tests/test_claude.py +379 -210
hud/agents/tests/test_client.py +9 -10
hud/agents/tests/test_gemini.py +369 -0
hud/agents/tests/test_grounded_openai_agent.py +65 -50
hud/agents/tests/test_openai.py +376 -140
hud/agents/tests/test_operator.py +362 -0
hud/agents/tests/test_run_eval.py +179 -0
hud/cli/__init__.py +461 -545
hud/cli/analyze.py +43 -5
hud/cli/build.py +664 -110
hud/cli/debug.py +8 -5
hud/cli/dev.py +882 -734
hud/cli/eval.py +782 -668
hud/cli/flows/dev.py +167 -0
hud/cli/flows/init.py +191 -0
hud/cli/flows/tasks.py +153 -56
hud/cli/flows/templates.py +151 -0
hud/cli/flows/tests/__init__.py +1 -0
hud/cli/flows/tests/test_dev.py +126 -0
hud/cli/init.py +60 -58
hud/cli/push.py +29 -11
hud/cli/rft.py +311 -0
hud/cli/rft_status.py +145 -0
hud/cli/tests/test_analyze.py +5 -5
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +108 -6
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_init.py +6 -1
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +140 -0
hud/cli/tests/test_convert.py +361 -0
hud/cli/tests/test_debug.py +12 -10
hud/cli/tests/test_dev.py +197 -0
hud/cli/tests/test_eval.py +251 -0
hud/cli/tests/test_eval_bedrock.py +51 -0
hud/cli/tests/test_init.py +124 -0
hud/cli/tests/test_main_module.py +11 -5
hud/cli/tests/test_mcp_server.py +12 -100
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/tests/test_registry.py +1 -1
hud/cli/tests/test_utils.py +1 -1
hud/cli/{rl → utils}/celebrate.py +14 -12
hud/cli/utils/config.py +18 -1
hud/cli/utils/docker.py +130 -4
hud/cli/utils/env_check.py +9 -9
hud/cli/utils/git.py +136 -0
hud/cli/utils/interactive.py +39 -5
hud/cli/utils/metadata.py +69 -0
hud/cli/utils/runner.py +1 -1
hud/cli/utils/server.py +2 -2
hud/cli/utils/source_hash.py +3 -3
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_git.py +142 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +258 -0
hud/cli/{rl → utils}/viewer.py +2 -2
hud/clients/README.md +12 -11
hud/clients/__init__.py +4 -3
hud/clients/base.py +166 -26
hud/clients/environment.py +51 -0
hud/clients/fastmcp.py +13 -6
hud/clients/mcp_use.py +40 -15
hud/clients/tests/test_analyze_scenarios.py +206 -0
hud/clients/tests/test_protocol.py +9 -3
hud/datasets/__init__.py +23 -20
hud/datasets/loader.py +327 -0
hud/datasets/runner.py +192 -105
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_loader.py +221 -0
hud/datasets/tests/test_utils.py +315 -0
hud/datasets/utils.py +270 -90
hud/environment/__init__.py +50 -0
hud/environment/connection.py +206 -0
hud/environment/connectors/__init__.py +33 -0
hud/environment/connectors/base.py +68 -0
hud/environment/connectors/local.py +177 -0
hud/environment/connectors/mcp_config.py +109 -0
hud/environment/connectors/openai.py +101 -0
hud/environment/connectors/remote.py +172 -0
hud/environment/environment.py +694 -0
hud/environment/integrations/__init__.py +45 -0
hud/environment/integrations/adk.py +67 -0
hud/environment/integrations/anthropic.py +196 -0
hud/environment/integrations/gemini.py +92 -0
hud/environment/integrations/langchain.py +82 -0
hud/environment/integrations/llamaindex.py +68 -0
hud/environment/integrations/openai.py +238 -0
hud/environment/mock.py +306 -0
hud/environment/router.py +112 -0
hud/environment/scenarios.py +493 -0
hud/environment/tests/__init__.py +1 -0
hud/environment/tests/test_connection.py +317 -0
hud/environment/tests/test_connectors.py +218 -0
hud/environment/tests/test_environment.py +161 -0
hud/environment/tests/test_integrations.py +257 -0
hud/environment/tests/test_local_connectors.py +201 -0
hud/environment/tests/test_scenarios.py +280 -0
hud/environment/tests/test_tools.py +208 -0
hud/environment/types.py +23 -0
hud/environment/utils/__init__.py +35 -0
hud/environment/utils/formats.py +215 -0
hud/environment/utils/schema.py +171 -0
hud/environment/utils/tool_wrappers.py +113 -0
hud/eval/__init__.py +67 -0
hud/eval/context.py +674 -0
hud/eval/display.py +299 -0
hud/eval/instrument.py +185 -0
hud/eval/manager.py +466 -0
hud/eval/parallel.py +268 -0
hud/eval/task.py +340 -0
hud/eval/tests/__init__.py +1 -0
hud/eval/tests/test_context.py +178 -0
hud/eval/tests/test_eval.py +210 -0
hud/eval/tests/test_manager.py +152 -0
hud/eval/tests/test_parallel.py +168 -0
hud/eval/tests/test_task.py +145 -0
hud/eval/types.py +63 -0
hud/eval/utils.py +183 -0
hud/patches/__init__.py +19 -0
hud/patches/mcp_patches.py +151 -0
hud/patches/warnings.py +54 -0
hud/samples/browser.py +4 -4
hud/server/__init__.py +2 -1
hud/server/low_level.py +2 -1
hud/server/router.py +164 -0
hud/server/server.py +567 -80
hud/server/tests/test_mcp_server_integration.py +11 -11
hud/server/tests/test_mcp_server_more.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/settings.py +45 -3
hud/shared/exceptions.py +36 -10
hud/shared/hints.py +26 -1
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +40 -31
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +20 -19
hud/telemetry/exporter.py +201 -0
hud/telemetry/instrument.py +158 -253
hud/telemetry/tests/test_eval_telemetry.py +356 -0
hud/telemetry/tests/test_exporter.py +258 -0
hud/telemetry/tests/test_instrument.py +401 -0
hud/tools/__init__.py +16 -2
hud/tools/apply_patch.py +639 -0
hud/tools/base.py +54 -4
hud/tools/bash.py +2 -2
hud/tools/computer/__init__.py +4 -0
hud/tools/computer/anthropic.py +2 -2
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/hud.py +23 -6
hud/tools/computer/openai.py +20 -21
hud/tools/computer/qwen.py +434 -0
hud/tools/computer/settings.py +37 -0
hud/tools/edit.py +3 -7
hud/tools/executors/base.py +4 -2
hud/tools/executors/pyautogui.py +1 -1
hud/tools/grounding/grounded_tool.py +13 -18
hud/tools/grounding/grounder.py +10 -31
hud/tools/grounding/tests/test_grounded_tool.py +26 -44
hud/tools/jupyter.py +330 -0
hud/tools/playwright.py +18 -3
hud/tools/shell.py +308 -0
hud/tools/tests/test_apply_patch.py +718 -0
hud/tools/tests/test_computer.py +4 -9
hud/tools/tests/test_computer_actions.py +24 -2
hud/tools/tests/test_jupyter_tool.py +181 -0
hud/tools/tests/test_shell.py +596 -0
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/tools/types.py +21 -1
hud/types.py +167 -57
hud/utils/__init__.py +2 -0
hud/utils/env.py +67 -0
hud/utils/hud_console.py +61 -3
hud/utils/mcp.py +15 -58
hud/utils/strict_schema.py +162 -0
hud/utils/tests/test_init.py +1 -2
hud/utils/tests/test_mcp.py +1 -28
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/utils/types.py +20 -0
hud/version.py +1 -1
hud_python-0.5.1.dist-info/METADATA +264 -0
hud_python-0.5.1.dist-info/RECORD +299 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
hud/agents/langchain.py +0 -261
hud/agents/lite_llm.py +0 -72
hud/cli/rl/__init__.py +0 -180
hud/cli/rl/config.py +0 -101
hud/cli/rl/display.py +0 -133
hud/cli/rl/gpu.py +0 -63
hud/cli/rl/gpu_utils.py +0 -321
hud/cli/rl/local_runner.py +0 -595
hud/cli/rl/presets.py +0 -96
hud/cli/rl/remote_runner.py +0 -463
hud/cli/rl/rl_api.py +0 -150
hud/cli/rl/vllm.py +0 -177
hud/cli/rl/wait_utils.py +0 -89
hud/datasets/parallel.py +0 -687
hud/misc/__init__.py +0 -1
hud/misc/claude_plays_pokemon.py +0 -292
hud/otel/__init__.py +0 -35
hud/otel/collector.py +0 -142
hud/otel/config.py +0 -181
hud/otel/context.py +0 -570
hud/otel/exporters.py +0 -369
hud/otel/instrumentation.py +0 -135
hud/otel/processors.py +0 -121
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_processors.py +0 -197
hud/rl/README.md +0 -30
hud/rl/__init__.py +0 -1
hud/rl/actor.py +0 -176
hud/rl/buffer.py +0 -405
hud/rl/chat_template.jinja +0 -101
hud/rl/config.py +0 -192
hud/rl/distributed.py +0 -132
hud/rl/learner.py +0 -637
hud/rl/tests/__init__.py +0 -1
hud/rl/tests/test_learner.py +0 -186
hud/rl/train.py +0 -382
hud/rl/types.py +0 -101
hud/rl/utils/start_vllm_server.sh +0 -30
hud/rl/utils.py +0 -524
hud/rl/vllm_adapter.py +0 -143
hud/telemetry/job.py +0 -352
hud/telemetry/replay.py +0 -74
hud/telemetry/tests/test_replay.py +0 -40
hud/telemetry/tests/test_trace.py +0 -63
hud/telemetry/trace.py +0 -158
hud/utils/agent_factories.py +0 -86
hud/utils/async_utils.py +0 -65
hud/utils/group_eval.py +0 -223
hud/utils/progress.py +0 -149
hud/utils/tasks.py +0 -127
hud/utils/tests/test_async_utils.py +0 -173
hud/utils/tests/test_progress.py +0 -261
hud_python-0.4.45.dist-info/METADATA +0 -552
hud_python-0.4.45.dist-info/RECORD +0 -228
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0

hud/telemetry/tests/test_instrument.py ADDED Viewed

@@ -0,0 +1,401 @@
+from __future__ import annotations
+from dataclasses import dataclass
+import pytest
+from hud.telemetry.instrument import _serialize_value, instrument
+def test_serialize_value_simple_types():
+    """Test _serialize_value with simple types."""
+    assert _serialize_value("string") == "string"
+    assert _serialize_value(42) == 42
+    assert _serialize_value(3.14) == 3.14
+    assert _serialize_value(True) is True
+    assert _serialize_value(None) is None
+def test_serialize_value_list():
+    """Test _serialize_value with lists."""
+    result = _serialize_value([1, 2, 3])
+    assert result == [1, 2, 3]
+def test_serialize_value_list_truncation():
+    """Test _serialize_value truncates long lists."""
+    long_list = list(range(20))
+    result = _serialize_value(long_list, max_items=5)
+    assert len(result) == 5
+    assert result == [0, 1, 2, 3, 4]
+def test_serialize_value_tuple():
+    """Test _serialize_value with tuples."""
+    result = _serialize_value((1, 2, 3))
+    assert result == [1, 2, 3]  # Converted to list by JSON
+def test_serialize_value_tuple_truncation():
+    """Test _serialize_value truncates long tuples."""
+    long_tuple = tuple(range(20))
+    result = _serialize_value(long_tuple, max_items=5)
+    assert len(result) == 5
+def test_serialize_value_dict():
+    """Test _serialize_value with dicts."""
+    result = _serialize_value({"key": "value"})
+    assert result == {"key": "value"}
+def test_serialize_value_dict_truncation():
+    """Test _serialize_value truncates large dicts."""
+    large_dict = {f"key{i}": i for i in range(20)}
+    result = _serialize_value(large_dict, max_items=5)
+    assert len(result) == 5
+def test_serialize_value_complex_object():
+    """Test _serialize_value with custom objects."""
+    @dataclass
+    class CustomObj:
+        name: str
+        value: int
+    obj = CustomObj(name="test", value=42)
+    result = _serialize_value(obj)
+    assert isinstance(result, dict)
+    assert result["name"] == "test"
+    assert result["value"] == 42
+def test_serialize_value_fallback():
+    """Test _serialize_value fallback for non-serializable objects."""
+    class WeirdObj:
+        def __init__(self):
+            raise Exception("Can't access")
+    obj = WeirdObj.__new__(WeirdObj)
+    result = _serialize_value(obj)
+    # The result is a string representation of the object
+    assert isinstance(result, str)
+    assert "WeirdObj" in result
+@pytest.mark.asyncio
+async def test_instrument_async_basic():
+    """Test instrument decorator on async function."""
+    @instrument
+    async def test_func(x: int, y: int) -> int:
+        return x + y
+    result = await test_func(2, 3)
+    assert result == 5
+@pytest.mark.asyncio
+async def test_instrument_async_with_params():
+    """Test instrument with custom parameters."""
+    @instrument(name="custom_name", category="custom_type")
+    async def test_func(x: int) -> int:
+        return x * 2
+    result = await test_func(5)
+    assert result == 10
+@pytest.mark.asyncio
+async def test_instrument_async_with_exception():
+    """Test instrument handles exceptions."""
+    @instrument
+    async def test_func():
+        raise ValueError("Test error")
+    with pytest.raises(ValueError, match="Test error"):
+        await test_func()
+@pytest.mark.asyncio
+async def test_instrument_async_no_record_args():
+    """Test instrument with record_args=False."""
+    @instrument(record_args=False)
+    async def test_func(x: int) -> int:
+        return x
+    result = await test_func(42)
+    assert result == 42
+@pytest.mark.asyncio
+async def test_instrument_async_no_record_result():
+    """Test instrument with record_result=False."""
+    @instrument(record_result=False)
+    async def test_func() -> str:
+        return "test"
+    result = await test_func()
+    assert result == "test"
+@pytest.mark.asyncio
+async def test_instrument_async_with_category():
+    """Test instrument with custom category."""
+    @instrument(category="agent")
+    async def test_func() -> int:
+        return 42
+    result = await test_func()
+    assert result == 42
+def test_instrument_sync_basic():
+    """Test instrument decorator on sync function."""
+    @instrument
+    def test_func(x: int, y: int) -> int:
+        return x + y
+    result = test_func(2, 3)
+    assert result == 5
+def test_instrument_sync_with_params():
+    """Test instrument on sync function with parameters."""
+    @instrument(name="sync_custom", category="sync_type")
+    def test_func(x: int) -> int:
+        return x * 2
+    result = test_func(5)
+    assert result == 10
+def test_instrument_sync_with_exception():
+    """Test instrument handles exceptions in sync functions."""
+    @instrument
+    def test_func():
+        raise ValueError("Sync error")
+    with pytest.raises(ValueError, match="Sync error"):
+        test_func()
+def test_instrument_sync_no_record_args():
+    """Test instrument sync with record_args=False."""
+    @instrument(record_args=False)
+    def test_func(x: int) -> int:
+        return x
+    result = test_func(42)
+    assert result == 42
+def test_instrument_sync_no_record_result():
+    """Test instrument sync with record_result=False."""
+    @instrument(record_result=False)
+    def test_func() -> str:
+        return "test"
+    result = test_func()
+    assert result == "test"
+def test_instrument_sync_with_category():
+    """Test instrument sync with custom category."""
+    @instrument(category="tool")
+    def test_func() -> int:
+        return 42
+    result = test_func()
+    assert result == 42
+def test_instrument_already_instrumented():
+    """Test that instrumenting already instrumented function is skipped."""
+    @instrument
+    def test_func():
+        return "original"
+    # Try to instrument again
+    test_func2 = instrument(test_func)
+    # Should be the same function
+    assert test_func2 is test_func
+def test_instrument_marks_as_instrumented():
+    """Test that instrument marks functions correctly."""
+    @instrument
+    def test_func():
+        return True
+    assert hasattr(test_func, "_hud_instrumented")
+    assert test_func._hud_instrumented is True
+    assert hasattr(test_func, "_hud_original")
+@pytest.mark.asyncio
+async def test_instrument_async_complex_result():
+    """Test instrument with complex result object."""
+    @instrument
+    async def test_func() -> dict:
+        return {"nested": {"data": [1, 2, 3]}, "count": 3}
+    result = await test_func()
+    assert result["count"] == 3
+def test_instrument_sync_complex_result():
+    """Test instrument sync with complex result."""
+    @dataclass
+    class Result:
+        value: int
+        name: str
+    @instrument
+    def test_func() -> Result:
+        return Result(value=42, name="test")
+    result = test_func()
+    assert result.value == 42
+@pytest.mark.asyncio
+async def test_instrument_async_with_self_param():
+    """Test instrument properly handles 'self' parameter."""
+    class TestClass:
+        @instrument
+        async def method(self, x: int) -> int:
+            return x * 2
+    obj = TestClass()
+    result = await obj.method(5)
+    assert result == 10
+def test_instrument_sync_with_cls_param():
+    """Test instrument properly handles 'cls' parameter."""
+    class TestClass:
+        @classmethod
+        @instrument
+        def method(cls, x: int) -> int:
+            return x * 3
+    result = TestClass.method(4)
+    assert result == 12
+@pytest.mark.asyncio
+async def test_instrument_async_serialization_error():
+    """Test instrument handles serialization errors gracefully."""
+    class UnserializableArg:
+        def __getattribute__(self, name):
+            raise Exception("Can't serialize")
+    @instrument
+    async def test_func(arg):
+        return "success"
+    # Should not raise, just skip serialization
+    result = await test_func(UnserializableArg())
+    assert result == "success"
+def test_instrument_function_without_signature():
+    """Test instrument on functions without inspectable signature."""
+    # Built-in functions don't have signatures
+    instrumented_len = instrument(len)
+    result = instrumented_len([1, 2, 3])
+    assert result == 3
+@pytest.mark.asyncio
+async def test_instrument_async_result_serialization_error():
+    """Test instrument handles result serialization errors."""
+    class UnserializableResult:
+        def __iter__(self):
+            raise Exception("Can't iterate")
+    @instrument
+    async def test_func():
+        return UnserializableResult()
+    # Should not raise, just skip result recording
+    result = await test_func()
+    assert isinstance(result, UnserializableResult)
+def test_instrument_without_parentheses():
+    """Test using @instrument without parentheses."""
+    @instrument
+    def test_func(x: int) -> int:
+        return x + 1
+    assert test_func(5) == 6
+def test_instrument_with_parentheses():
+    """Test using @instrument() with parentheses."""
+    @instrument()
+    def test_func(x: int) -> int:
+        return x + 1
+    assert test_func(5) == 6
+@pytest.mark.asyncio
+async def test_instrument_async_with_defaults():
+    """Test instrument with function that has default arguments."""
+    @instrument
+    async def test_func(x: int, y: int = 10) -> int:
+        return x + y
+    assert await test_func(5) == 15
+    assert await test_func(5, 20) == 25
+def test_instrument_sync_with_kwargs():
+    """Test instrument with keyword arguments."""
+    @instrument
+    def test_func(x: int, **kwargs) -> dict:
+        return {"x": x, **kwargs}
+    result = test_func(1, a=2, b=3)
+    assert result == {"x": 1, "a": 2, "b": 3}
+@pytest.mark.asyncio
+async def test_instrument_async_with_varargs():
+    """Test instrument with *args."""
+    @instrument
+    async def test_func(*args) -> int:
+        return sum(args)
+    result = await test_func(1, 2, 3, 4)
+    assert result == 10

hud/tools/__init__.py CHANGED Viewed

@@ -12,7 +12,13 @@ from .response import ResponseTool
 from .submit import SubmitTool
 if TYPE_CHECKING:
-    from .computer import AnthropicComputerTool, HudComputerTool, OpenAIComputerTool
+    from .computer import (
+        AnthropicComputerTool,
+        GeminiComputerTool,
+        HudComputerTool,
+        OpenAIComputerTool,
+        QwenComputerTool,
+    )
 __all__ = [
     "AnthropicComputerTool",
@@ -20,9 +26,11 @@ __all__ = [
     "BaseTool",
     "BashTool",
     "EditTool",
+    "GeminiComputerTool",
     "HudComputerTool",
     "OpenAIComputerTool",
     "PlaywrightTool",
+    "QwenComputerTool",
     "ResponseTool",
     "SubmitTool",
 ]
@@ -30,7 +38,13 @@ __all__ = [
 def __getattr__(name: str) -> Any:
     """Lazy import computer tools to avoid importing pyautogui unless needed."""
-    if name in ("AnthropicComputerTool", "HudComputerTool", "OpenAIComputerTool"):
+    if name in (
+        "AnthropicComputerTool",
+        "HudComputerTool",
+        "OpenAIComputerTool",
+        "GeminiComputerTool",
+        "QwenComputerTool",
+    ):
         from . import computer
         return getattr(computer, name)

hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl