hud-python 0.4.59__tar.gz → 0.4.60__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.4.59 → hud_python-0.4.60}/PKG-INFO +1 -1
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/README.md +1 -1
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/server/pyproject.toml +1 -1
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/gemini.py +2 -1
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/eval.py +21 -16
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/datasets/parallel.py +1 -1
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/datasets/runner.py +4 -53
- hud_python-0.4.60/hud/datasets/tests/test_runner.py +67 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/context.py +16 -59
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/actor.py +1 -1
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/__init__.py +14 -17
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/async_context.py +77 -85
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/job.py +8 -44
- hud_python-0.4.60/hud/telemetry/tests/test_async_context.py +515 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/tests/test_job.py +0 -46
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/trace.py +5 -7
- hud_python-0.4.60/hud/telemetry/utils.py +42 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/group_eval.py +19 -11
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/version.py +1 -1
- {hud_python-0.4.59 → hud_python-0.4.60}/pyproject.toml +1 -1
- hud_python-0.4.59/hud/datasets/tests/test_runner.py +0 -106
- hud_python-0.4.59/hud/telemetry/tests/test_async_context.py +0 -242
- {hud_python-0.4.59 → hud_python-0.4.60}/.gitignore +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/LICENSE +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/blank/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/blank/environment/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/blank/environment/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/blank/server/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/blank/server/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/browser-base/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/environment/2048/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/environment/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/environment/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/environment/todo/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/browser/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/deepresearch/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/deepresearch/environment/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/deepresearch/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/deepresearch/server/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/remote_browser/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/remote_browser/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/rubrics/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/rubrics/environment/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/rubrics/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/rubrics/server/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/text_2048/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/environments/text_2048/pyproject.toml +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/examples/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/__main__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/base.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/claude.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/langchain.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/lite_llm.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/misc/integration_test_agent.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/openai.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/openai_chat_generic.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_base_runtime.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_gemini.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/agents/utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/__main__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/analyze.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/build.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/clone.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/debug.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/dev.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/flows/dev.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/flows/tasks.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/get.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/init.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/list_func.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/pull.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/push.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/remove.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/celebrate.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/config.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/display.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/gpu.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/gpu_utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/local_runner.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/presets.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/remote_runner.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/rl_api.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/viewer.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/vllm.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/rl/wait_utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_analyze_module.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_build.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_build_failure.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_convert.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_eval.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/config.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/local_runner.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/package_runner.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/server.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_environment.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_interactive_module.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_local_runner.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_metadata.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_package_runner.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_registry_utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_remote_runner.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_runner_modules.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/base.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/fastmcp.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/mcp_use.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/tests/test_mcp_use_retry.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/utils/mcp_use_retry.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/utils/retry.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/datasets/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/datasets/tests/test_utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/datasets/utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/misc/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/misc/claude_plays_pokemon.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/native/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/native/comparator.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/collector.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/config.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/exporters.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/instrumentation.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/processors.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/tests/test_instrumentation.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/otel/tests/test_processors.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/py.typed +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/README.md +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/buffer.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/chat_template.jinja +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/config.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/distributed.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/learner.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/tests/test_learner.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/train.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/types.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/utils/start_vllm_server.sh +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/rl/vllm_adapter.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/samples/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/samples/browser.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/context.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/low_level.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/router.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/server.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/settings.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/exceptions.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/hints.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/requests.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/replay.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/tests/test_replay.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/telemetry/tests/test_trace.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/base.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/bash.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/gemini.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/edit.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/base.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/playwright.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/response.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/submit.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_types.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/types.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/tools/utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/types.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/agent_factories.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/async_utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/hud_console.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/mcp.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/progress.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/task_tracking.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tasks.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/telemetry.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_agent_factories.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_async_utils.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_progress.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_tasks.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.4.59 → hud_python-0.4.60}/hud/utils/tool_shorthand.py +0 -0
|
@@ -496,7 +496,7 @@ from hud.clients import MCPClient
|
|
|
496
496
|
|
|
497
497
|
async def main():
|
|
498
498
|
# `trace` captures *everything* that happens and sends it to hud.ai
|
|
499
|
-
with hud.
|
|
499
|
+
async with hud.async_trace("local_test"):
|
|
500
500
|
task = Task(
|
|
501
501
|
prompt="Complete the task",
|
|
502
502
|
mcp_config={
|
|
@@ -461,7 +461,8 @@ class GeminiAgent(MCPAgent):
|
|
|
461
461
|
def _remove_old_screenshots(self, messages: list[genai_types.Content]) -> None:
|
|
462
462
|
"""
|
|
463
463
|
Remove screenshots from old turns to manage context length.
|
|
464
|
-
Keeps only the last N turns with screenshots (configured via
|
|
464
|
+
Keeps only the last N turns with screenshots (configured via
|
|
465
|
+
self.max_recent_turn_with_screenshots).
|
|
465
466
|
"""
|
|
466
467
|
turn_with_screenshots_found = 0
|
|
467
468
|
|
|
@@ -260,9 +260,8 @@ async def run_single_task(
|
|
|
260
260
|
) -> None:
|
|
261
261
|
"""Load one task and execute it, or detect if JSON contains a list and run as dataset."""
|
|
262
262
|
|
|
263
|
-
# Provide early feedback to user
|
|
264
263
|
hud_console.info("🔧 Initializing evaluation...")
|
|
265
|
-
|
|
264
|
+
|
|
266
265
|
try:
|
|
267
266
|
from hud.utils.tasks import load_tasks
|
|
268
267
|
except ImportError as e:
|
|
@@ -399,23 +398,31 @@ async def run_single_task(
|
|
|
399
398
|
|
|
400
399
|
if group_size > 1:
|
|
401
400
|
hud_console.info(f"🔄 Running task with group_size={group_size}")
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
401
|
+
async with hud.async_job(
|
|
402
|
+
name=f"Group Eval: {task_prompt[:50]}... (x{group_size})",
|
|
403
|
+
metadata={
|
|
404
|
+
"task_id": getattr(task, "id", None),
|
|
405
|
+
"group_size": group_size,
|
|
406
|
+
"total_episodes": group_size,
|
|
407
|
+
},
|
|
408
|
+
) as job:
|
|
409
|
+
stats = await run_tasks_grouped(
|
|
410
|
+
tasks=[task],
|
|
411
|
+
agent_class=agent_class,
|
|
412
|
+
agent_config=agent_config,
|
|
413
|
+
group_size=group_size,
|
|
414
|
+
max_parallel_episodes=48,
|
|
415
|
+
max_steps=max_steps,
|
|
416
|
+
verbose=verbose,
|
|
417
|
+
job_id=job.id,
|
|
418
|
+
)
|
|
412
419
|
display_group_statistics(stats, show_details=True)
|
|
413
420
|
else:
|
|
414
421
|
# Enable agent step logging for single task mode
|
|
415
422
|
logging.getLogger("hud.agents").setLevel(logging.INFO)
|
|
416
423
|
logging.getLogger("hud.agents.base").setLevel(logging.INFO)
|
|
417
424
|
|
|
418
|
-
with hud.
|
|
425
|
+
async with hud.async_trace(name=task_prompt):
|
|
419
426
|
agent = build_agent(
|
|
420
427
|
agent_type,
|
|
421
428
|
model=model,
|
|
@@ -442,10 +449,8 @@ async def run_full_dataset(
|
|
|
442
449
|
) -> list[Any]:
|
|
443
450
|
"""Run evaluation across the entire dataset using asyncio-based concurrency."""
|
|
444
451
|
|
|
445
|
-
# Provide early feedback to user
|
|
446
452
|
hud_console.info("🔧 Initializing evaluation...")
|
|
447
453
|
|
|
448
|
-
# Import run_dataset lazily
|
|
449
454
|
try:
|
|
450
455
|
from hud.datasets import run_dataset
|
|
451
456
|
from hud.utils.tasks import load_tasks
|
|
@@ -627,7 +632,7 @@ async def run_full_dataset(
|
|
|
627
632
|
hud_console.info(f"🔄 Running dataset with group_size={group_size}")
|
|
628
633
|
|
|
629
634
|
# Run with job tracking
|
|
630
|
-
with hud.
|
|
635
|
+
async with hud.async_job(
|
|
631
636
|
name=f"Evaluation {dataset_name} (group_size={group_size})",
|
|
632
637
|
metadata={
|
|
633
638
|
"dataset": source,
|
|
@@ -371,7 +371,7 @@ async def run_dataset_parallel_manual(
|
|
|
371
371
|
logger.warning("Failed to extract dataset verification info")
|
|
372
372
|
|
|
373
373
|
# Create job context
|
|
374
|
-
with hud.
|
|
374
|
+
async with hud.async_job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
|
|
375
375
|
# Prepare agent class info for pickling
|
|
376
376
|
agent_module = agent_class.__module__
|
|
377
377
|
agent_name = agent_class.__name__
|
|
@@ -30,20 +30,14 @@ async def run_dataset(
|
|
|
30
30
|
) -> list[Any]:
|
|
31
31
|
"""Run all tasks in a dataset with automatic job and telemetry tracking.
|
|
32
32
|
|
|
33
|
-
This function handles concurrent task execution with proper telemetry collection.
|
|
34
|
-
All tasks are executed in parallel up to `max_concurrent`, with full telemetry
|
|
35
|
-
automatically uploaded to the HUD platform.
|
|
36
|
-
|
|
37
33
|
Args:
|
|
38
34
|
name: Name for the job
|
|
39
35
|
dataset: HuggingFace dataset identifier (e.g. "hud-evals/SheetBench-50"),
|
|
40
36
|
Dataset object, OR list of Task objects
|
|
41
37
|
agent_class: Agent class to instantiate (e.g., ClaudeAgent)
|
|
42
|
-
agent_config: Configuration
|
|
43
|
-
max_concurrent: Maximum
|
|
44
|
-
|
|
45
|
-
task complexity and available resources.
|
|
46
|
-
metadata: Optional metadata for the job
|
|
38
|
+
agent_config: Configuration kwargs for agent initialization
|
|
39
|
+
max_concurrent: Maximum concurrent tasks (recommended: 50-200)
|
|
40
|
+
metadata: Optional job metadata
|
|
47
41
|
max_steps: Maximum steps per task
|
|
48
42
|
split: Dataset split to use when loading from string (default: "train")
|
|
49
43
|
auto_respond: Whether to use auto-response agent
|
|
@@ -101,7 +95,6 @@ async def run_dataset(
|
|
|
101
95
|
except Exception:
|
|
102
96
|
logger.warning("Failed to extract dataset verification info")
|
|
103
97
|
|
|
104
|
-
# Use async job context manager for high-concurrency telemetry
|
|
105
98
|
async with hud.async_job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
|
|
106
99
|
# Run tasks with semaphore for concurrency control
|
|
107
100
|
sem = asyncio.Semaphore(max_concurrent)
|
|
@@ -112,12 +105,10 @@ async def run_dataset(
|
|
|
112
105
|
try:
|
|
113
106
|
# Create trace for this task
|
|
114
107
|
task_name = task_dict.get("prompt") or f"Task {index}"
|
|
115
|
-
|
|
116
|
-
# Ensure task_id is a string for baggage propagation
|
|
117
108
|
raw_task_id = task_dict.get("id")
|
|
118
109
|
safe_task_id = str(raw_task_id) if raw_task_id is not None else None
|
|
110
|
+
|
|
119
111
|
async with hud.async_trace(task_name, job_id=job_obj.id, task_id=safe_task_id):
|
|
120
|
-
# with hud.trace(task_name, job_id=job_obj.id, task_id=safe_task_id):
|
|
121
112
|
# Convert dict to Task here, at trace level
|
|
122
113
|
task = Task(**task_dict)
|
|
123
114
|
|
|
@@ -141,44 +132,4 @@ async def run_dataset(
|
|
|
141
132
|
if isinstance(result, Exception):
|
|
142
133
|
logger.error("Worker %s failed with exception: %s", i, result, exc_info=result)
|
|
143
134
|
|
|
144
|
-
# Ensure all telemetry is uploaded before returning
|
|
145
|
-
await _flush_telemetry()
|
|
146
|
-
|
|
147
135
|
return results
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
async def _flush_telemetry() -> None:
|
|
151
|
-
"""Flush all pending telemetry operations.
|
|
152
|
-
|
|
153
|
-
Ensures complete telemetry upload by:
|
|
154
|
-
1. Waiting for all async status updates to complete
|
|
155
|
-
2. Forcing OpenTelemetry span processor to export remaining spans
|
|
156
|
-
|
|
157
|
-
This prevents telemetry loss at high concurrency (200+ tasks) by ensuring
|
|
158
|
-
all operations complete before process exit.
|
|
159
|
-
"""
|
|
160
|
-
from hud.otel.config import is_telemetry_configured
|
|
161
|
-
from hud.utils import hud_console
|
|
162
|
-
from hud.utils.task_tracking import wait_all_tasks
|
|
163
|
-
|
|
164
|
-
hud_console.info("Uploading telemetry...")
|
|
165
|
-
|
|
166
|
-
# Step 1: Wait for async status updates (job/trace status)
|
|
167
|
-
completed_tasks = await wait_all_tasks(timeout_seconds=20.0)
|
|
168
|
-
if completed_tasks > 0:
|
|
169
|
-
hud_console.info(f"Completed {completed_tasks} pending telemetry tasks")
|
|
170
|
-
|
|
171
|
-
# Step 2: Flush OpenTelemetry span exports
|
|
172
|
-
if is_telemetry_configured():
|
|
173
|
-
try:
|
|
174
|
-
from opentelemetry import trace
|
|
175
|
-
from opentelemetry.sdk.trace import TracerProvider
|
|
176
|
-
|
|
177
|
-
provider = trace.get_tracer_provider()
|
|
178
|
-
if isinstance(provider, TracerProvider):
|
|
179
|
-
provider.force_flush(timeout_millis=20000)
|
|
180
|
-
logger.debug("OpenTelemetry spans flushed successfully")
|
|
181
|
-
except Exception as e:
|
|
182
|
-
logger.warning("Failed to flush OpenTelemetry: %s", e)
|
|
183
|
-
|
|
184
|
-
hud_console.info("Telemetry uploaded successfully")
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from unittest.mock import MagicMock, patch
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from hud.telemetry.utils import flush_telemetry
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.mark.asyncio
|
|
11
|
+
async def test_flush_telemetry():
|
|
12
|
+
"""Test flush_telemetry function."""
|
|
13
|
+
with (
|
|
14
|
+
patch("hud.otel.config.is_telemetry_configured", return_value=True),
|
|
15
|
+
patch("hud.utils.hud_console.hud_console"),
|
|
16
|
+
patch("opentelemetry.trace.get_tracer_provider") as mock_get_provider,
|
|
17
|
+
):
|
|
18
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
19
|
+
|
|
20
|
+
mock_provider = MagicMock(spec=TracerProvider)
|
|
21
|
+
mock_provider.force_flush.return_value = True
|
|
22
|
+
mock_get_provider.return_value = mock_provider
|
|
23
|
+
|
|
24
|
+
await flush_telemetry()
|
|
25
|
+
|
|
26
|
+
mock_provider.force_flush.assert_called_once_with(timeout_millis=5000)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.mark.asyncio
|
|
30
|
+
async def test_flush_telemetry_not_configured():
|
|
31
|
+
"""Test flush_telemetry when telemetry is not configured."""
|
|
32
|
+
with patch("hud.otel.config.is_telemetry_configured", return_value=False):
|
|
33
|
+
await flush_telemetry()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@pytest.mark.asyncio
|
|
37
|
+
async def test_flush_telemetry_exception():
|
|
38
|
+
"""Test flush_telemetry handles exceptions gracefully."""
|
|
39
|
+
with (
|
|
40
|
+
patch("hud.otel.config.is_telemetry_configured", return_value=True),
|
|
41
|
+
patch("hud.utils.hud_console.hud_console"),
|
|
42
|
+
patch("opentelemetry.trace.get_tracer_provider") as mock_get_provider,
|
|
43
|
+
):
|
|
44
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
45
|
+
|
|
46
|
+
mock_provider = MagicMock(spec=TracerProvider)
|
|
47
|
+
mock_provider.force_flush.side_effect = Exception("Flush failed")
|
|
48
|
+
mock_get_provider.return_value = mock_provider
|
|
49
|
+
|
|
50
|
+
await flush_telemetry()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@pytest.mark.asyncio
|
|
54
|
+
async def test_flush_telemetry_timeout():
|
|
55
|
+
"""Test flush_telemetry when force_flush times out."""
|
|
56
|
+
with (
|
|
57
|
+
patch("hud.otel.config.is_telemetry_configured", return_value=True),
|
|
58
|
+
patch("hud.utils.hud_console.hud_console"),
|
|
59
|
+
patch("opentelemetry.trace.get_tracer_provider") as mock_get_provider,
|
|
60
|
+
):
|
|
61
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
62
|
+
|
|
63
|
+
mock_provider = MagicMock(spec=TracerProvider)
|
|
64
|
+
mock_provider.force_flush.return_value = False
|
|
65
|
+
mock_get_provider.return_value = mock_provider
|
|
66
|
+
|
|
67
|
+
await flush_telemetry()
|
|
@@ -22,7 +22,6 @@ if TYPE_CHECKING:
|
|
|
22
22
|
|
|
23
23
|
from hud.settings import settings
|
|
24
24
|
from hud.shared import make_request, make_request_sync
|
|
25
|
-
from hud.utils.async_utils import fire_and_forget
|
|
26
25
|
|
|
27
26
|
logger = logging.getLogger(__name__)
|
|
28
27
|
|
|
@@ -301,32 +300,6 @@ async def _update_task_status_async(
|
|
|
301
300
|
logger.warning("Failed to update task status: %s", e)
|
|
302
301
|
|
|
303
302
|
|
|
304
|
-
def _fire_and_forget_status_update(
|
|
305
|
-
task_run_id: str,
|
|
306
|
-
status: str,
|
|
307
|
-
job_id: str | None = None,
|
|
308
|
-
error_message: str | None = None,
|
|
309
|
-
trace_name: str | None = None,
|
|
310
|
-
task_id: str | None = None,
|
|
311
|
-
group_id: str | None = None,
|
|
312
|
-
extra_metadata: dict[str, Any] | None = None,
|
|
313
|
-
) -> None:
|
|
314
|
-
"""Fire and forget status update - works in any context including Jupyter."""
|
|
315
|
-
fire_and_forget(
|
|
316
|
-
_update_task_status_async(
|
|
317
|
-
task_run_id,
|
|
318
|
-
status,
|
|
319
|
-
job_id,
|
|
320
|
-
error_message,
|
|
321
|
-
trace_name,
|
|
322
|
-
task_id,
|
|
323
|
-
group_id,
|
|
324
|
-
extra_metadata,
|
|
325
|
-
),
|
|
326
|
-
f"update task {task_run_id} status to {status}",
|
|
327
|
-
)
|
|
328
|
-
|
|
329
|
-
|
|
330
303
|
def _update_task_status_sync(
|
|
331
304
|
task_run_id: str,
|
|
332
305
|
status: str,
|
|
@@ -468,7 +441,7 @@ def _print_trace_complete_url(task_run_id: str, error_occurred: bool = False) ->
|
|
|
468
441
|
class trace:
|
|
469
442
|
"""Internal OpenTelemetry trace context manager.
|
|
470
443
|
|
|
471
|
-
This is the implementation
|
|
444
|
+
This is the sync implementation. For async code, use hud.async_trace() instead.
|
|
472
445
|
"""
|
|
473
446
|
|
|
474
447
|
def __init__(
|
|
@@ -532,9 +505,9 @@ class trace:
|
|
|
532
505
|
)
|
|
533
506
|
self._span = self._span_manager.__enter__()
|
|
534
507
|
|
|
535
|
-
# Update task status to running
|
|
508
|
+
# Update task status to running (sync call - blocking is expected)
|
|
536
509
|
if self.is_root and settings.telemetry_enabled and settings.api_key:
|
|
537
|
-
|
|
510
|
+
_update_task_status_sync(
|
|
538
511
|
self.task_run_id,
|
|
539
512
|
"running",
|
|
540
513
|
job_id=self.job_id,
|
|
@@ -542,7 +515,6 @@ class trace:
|
|
|
542
515
|
task_id=self.task_id,
|
|
543
516
|
group_id=self.group_id,
|
|
544
517
|
)
|
|
545
|
-
# Print the nice trace URL box (only if not part of a job)
|
|
546
518
|
if not self.job_id:
|
|
547
519
|
_print_trace_url(self.task_run_id)
|
|
548
520
|
|
|
@@ -556,35 +528,20 @@ class trace:
|
|
|
556
528
|
exc_tb: TracebackType | None,
|
|
557
529
|
) -> None:
|
|
558
530
|
"""Exit the trace context."""
|
|
559
|
-
# Update task status
|
|
531
|
+
# Update task status (sync call - blocking is expected for sync context manager)
|
|
560
532
|
if self.is_root and settings.telemetry_enabled and settings.api_key:
|
|
561
|
-
if exc_type
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
if not self.job_id:
|
|
574
|
-
_print_trace_complete_url(self.task_run_id, error_occurred=True)
|
|
575
|
-
else:
|
|
576
|
-
# Use fire-and-forget to avoid blocking the event loop
|
|
577
|
-
_fire_and_forget_status_update(
|
|
578
|
-
self.task_run_id,
|
|
579
|
-
"completed",
|
|
580
|
-
job_id=self.job_id,
|
|
581
|
-
trace_name=self.span_name,
|
|
582
|
-
task_id=self.task_id,
|
|
583
|
-
group_id=self.group_id,
|
|
584
|
-
)
|
|
585
|
-
# Print success completion message (only if not part of a job)
|
|
586
|
-
if not self.job_id:
|
|
587
|
-
_print_trace_complete_url(self.task_run_id, error_occurred=False)
|
|
533
|
+
status = "error" if exc_type else "completed"
|
|
534
|
+
_update_task_status_sync(
|
|
535
|
+
self.task_run_id,
|
|
536
|
+
status,
|
|
537
|
+
job_id=self.job_id,
|
|
538
|
+
error_message=str(exc_val) if exc_val else None,
|
|
539
|
+
trace_name=self.span_name,
|
|
540
|
+
task_id=self.task_id,
|
|
541
|
+
group_id=self.group_id,
|
|
542
|
+
)
|
|
543
|
+
if not self.job_id:
|
|
544
|
+
_print_trace_complete_url(self.task_run_id, error_occurred=bool(exc_type))
|
|
588
545
|
|
|
589
546
|
# End the span
|
|
590
547
|
if self._span and self._span_manager is not None:
|
|
@@ -109,7 +109,7 @@ class Actor:
|
|
|
109
109
|
|
|
110
110
|
# Run the task
|
|
111
111
|
try:
|
|
112
|
-
with hud.
|
|
112
|
+
async with hud.async_trace(f"Training | {task.prompt}", job_id=job_id):
|
|
113
113
|
result = await agent.run(task, max_steps=self.actor_config.max_steps_per_episode)
|
|
114
114
|
|
|
115
115
|
except Exception:
|
|
@@ -2,30 +2,27 @@
|
|
|
2
2
|
|
|
3
3
|
Provides telemetry APIs for tracking agent execution and experiments.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Async Usage (Recommended):
|
|
6
6
|
>>> import hud
|
|
7
|
-
>>> with hud.
|
|
8
|
-
...
|
|
7
|
+
>>> async with hud.async_trace("Task"):
|
|
8
|
+
... await agent.run(task)
|
|
9
|
+
>>> async with hud.async_job("Evaluation") as job:
|
|
10
|
+
... async with hud.async_trace("Task", job_id=job.id):
|
|
11
|
+
... await agent.run(task)
|
|
9
12
|
|
|
13
|
+
Sync Usage:
|
|
14
|
+
>>> import hud
|
|
15
|
+
>>> with hud.trace("Task"):
|
|
16
|
+
... do_work()
|
|
10
17
|
>>> with hud.job("My Job") as job:
|
|
11
18
|
... with hud.trace("Task", job_id=job.id):
|
|
12
19
|
... do_work()
|
|
13
20
|
|
|
14
|
-
High-Concurrency Usage (200+ parallel tasks):
|
|
15
|
-
>>> import hud
|
|
16
|
-
>>> async with hud.async_job("Evaluation") as job:
|
|
17
|
-
... async with hud.async_trace("Task", job_id=job.id):
|
|
18
|
-
... await do_async_work()
|
|
19
|
-
|
|
20
21
|
APIs:
|
|
21
|
-
-
|
|
22
|
-
-
|
|
23
|
-
-
|
|
24
|
-
-
|
|
25
|
-
|
|
26
|
-
Note:
|
|
27
|
-
Use async_trace/async_job only for high-concurrency scenarios (200+ tasks).
|
|
28
|
-
The run_dataset() function uses them automatically.
|
|
22
|
+
- async_trace(), async_job() - Async context managers (recommended)
|
|
23
|
+
- trace(), job() - Sync context managers
|
|
24
|
+
- flush_telemetry() - Manual span flushing (rarely needed)
|
|
25
|
+
- instrument() - Function instrumentation decorator
|
|
29
26
|
"""
|
|
30
27
|
|
|
31
28
|
from __future__ import annotations
|