hud-python 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (58) hide show
  1. hud/__init__.py +4 -3
  2. hud/adapters/claude/adapter.py +5 -14
  3. hud/adapters/common/adapter.py +3 -3
  4. hud/adapters/common/tests/__init__.py +0 -0
  5. hud/adapters/common/tests/test_adapter.py +277 -0
  6. hud/adapters/common/types.py +3 -3
  7. hud/adapters/operator/adapter.py +16 -23
  8. hud/agent/__init__.py +8 -1
  9. hud/agent/base.py +28 -28
  10. hud/agent/claude.py +69 -60
  11. hud/agent/langchain.py +32 -26
  12. hud/agent/operator.py +75 -67
  13. hud/env/__init__.py +5 -5
  14. hud/env/client.py +2 -2
  15. hud/env/docker_client.py +37 -39
  16. hud/env/environment.py +91 -66
  17. hud/env/local_docker_client.py +5 -7
  18. hud/env/remote_client.py +39 -32
  19. hud/env/remote_docker_client.py +13 -3
  20. hud/evaluators/__init__.py +2 -3
  21. hud/evaluators/base.py +4 -3
  22. hud/evaluators/inspect.py +3 -8
  23. hud/evaluators/judge.py +34 -58
  24. hud/evaluators/match.py +42 -49
  25. hud/evaluators/remote.py +13 -26
  26. hud/evaluators/tests/__init__.py +0 -0
  27. hud/evaluators/tests/test_inspect.py +12 -0
  28. hud/evaluators/tests/test_judge.py +231 -0
  29. hud/evaluators/tests/test_match.py +115 -0
  30. hud/evaluators/tests/test_remote.py +98 -0
  31. hud/exceptions.py +167 -0
  32. hud/gym.py +9 -7
  33. hud/job.py +179 -109
  34. hud/server/__init__.py +2 -2
  35. hud/server/requests.py +148 -186
  36. hud/server/tests/__init__.py +0 -0
  37. hud/server/tests/test_requests.py +275 -0
  38. hud/settings.py +3 -2
  39. hud/task.py +9 -19
  40. hud/taskset.py +44 -11
  41. hud/trajectory.py +6 -9
  42. hud/types.py +12 -9
  43. hud/utils/__init__.py +2 -2
  44. hud/utils/common.py +36 -15
  45. hud/utils/config.py +45 -30
  46. hud/utils/progress.py +34 -21
  47. hud/utils/telemetry.py +10 -11
  48. hud/utils/tests/__init__.py +0 -0
  49. hud/utils/tests/test_common.py +52 -0
  50. hud/utils/tests/test_config.py +129 -0
  51. hud/utils/tests/test_progress.py +225 -0
  52. hud/utils/tests/test_telemetry.py +37 -0
  53. hud/utils/tests/test_version.py +8 -0
  54. {hud_python-0.2.2.dist-info → hud_python-0.2.4.dist-info}/METADATA +9 -6
  55. hud_python-0.2.4.dist-info/RECORD +62 -0
  56. hud_python-0.2.2.dist-info/RECORD +0 -46
  57. {hud_python-0.2.2.dist-info → hud_python-0.2.4.dist-info}/WHEEL +0 -0
  58. {hud_python-0.2.2.dist-info → hud_python-0.2.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,98 @@
1
+ from __future__ import annotations
2
+
3
+ import pytest
4
+
5
+ from hud.evaluators.base import EvaluationResult
6
+ from hud.evaluators.remote import _remote_eval_call, remote_evaluate
7
+
8
+
9
+ @pytest.mark.asyncio
10
+ async def test_remote_eval_call_success(mocker):
11
+ mock_response = {
12
+ "score": 0.85,
13
+ "reason": "Good match",
14
+ "details": {"relevance": 0.9, "correctness": 0.8},
15
+ }
16
+ mock_make_request = mocker.patch(
17
+ "hud.evaluators.remote.make_request", return_value=mock_response
18
+ )
19
+
20
+ result = await _remote_eval_call(
21
+ response="test response", answer="test answer", eval_type="match"
22
+ )
23
+
24
+ assert result == mock_response
25
+ mock_make_request.assert_called_once()
26
+ call_args = mock_make_request.call_args[1]
27
+ assert call_args["method"] == "POST"
28
+ assert "evaluations/evaluate" in call_args["url"]
29
+ assert call_args["json"]["response"] == "test response"
30
+ assert call_args["json"]["answer"] == "test answer"
31
+ assert call_args["json"]["type"] == "match"
32
+
33
+
34
+ @pytest.mark.asyncio
35
+ async def test_remote_eval_call_with_config(mocker):
36
+ mock_response = {"score": 0.75, "reason": "Good", "details": {}}
37
+ mock_make_request = mocker.patch(
38
+ "hud.evaluators.remote.make_request", return_value=mock_response
39
+ )
40
+
41
+ config = {"threshold": 0.8, "strict": True}
42
+ result = await _remote_eval_call(
43
+ response="test response", answer="test answer", eval_type="judge", config=config
44
+ )
45
+
46
+ assert result == mock_response
47
+ mock_make_request.assert_called_once()
48
+ call_args = mock_make_request.call_args[1]
49
+ assert call_args["json"]["config"] == config
50
+
51
+
52
+ @pytest.mark.asyncio
53
+ async def test_remote_eval_call_failure(mocker):
54
+ mocker.patch("hud.evaluators.remote.make_request", side_effect=Exception("API error"))
55
+
56
+ result = await _remote_eval_call(
57
+ response="test response", answer="test answer", eval_type="match"
58
+ )
59
+
60
+ assert result["score"] == -1.0
61
+ assert "Remote evaluation failed" in result["reason"]
62
+ assert "API error" in result["reason"]
63
+ assert result["details"] == {}
64
+
65
+
66
+ def test_remote_evaluate(mocker):
67
+ mock_result = {"score": 0.9, "reason": "Excellent match", "details": {"similarity": 0.95}}
68
+
69
+ async def mock_remote_call(*args, **kwargs):
70
+ return mock_result
71
+
72
+ mocker.patch("hud.evaluators.remote._remote_eval_call", side_effect=mock_remote_call)
73
+
74
+ result = remote_evaluate(
75
+ response="test response", answer="test answer", eval_type="custom_eval"
76
+ )
77
+
78
+ assert isinstance(result, EvaluationResult)
79
+ assert result.score == 0.9
80
+ assert result.reason == "Excellent match"
81
+ assert result.mode == "custom_eval"
82
+ assert result.criteria_scores == {"similarity": 0.95}
83
+
84
+
85
+ def test_remote_evaluate_missing_fields(mocker):
86
+ mock_result = {"score": 0.8} # Missing reason and details
87
+
88
+ async def mock_remote_call(*args, **kwargs):
89
+ return mock_result
90
+
91
+ mocker.patch("hud.evaluators.remote._remote_eval_call", side_effect=mock_remote_call)
92
+
93
+ result = remote_evaluate(response="test response", answer="test answer")
94
+
95
+ assert result.score == 0.8
96
+ assert result.reason == "Remote evaluation completed"
97
+ assert result.mode == "default"
98
+ assert result.criteria_scores == {}
hud/exceptions.py ADDED
@@ -0,0 +1,167 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ if TYPE_CHECKING:
7
+ import httpx
8
+ from typing_extensions import Self
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class HudException(Exception):
14
+ """Base exception class for all HUD SDK errors.
15
+
16
+ This is the parent class for all exceptions raised by the HUD SDK.
17
+ Consumers should be able to catch this exception to handle any HUD-related error.
18
+ """
19
+
20
+
21
+ class HudRequestError(Exception):
22
+ """Any request to the HUD API can raise this exception."""
23
+
24
+ def __init__(
25
+ self,
26
+ message: str,
27
+ status_code: int | None = None,
28
+ response_text: str | None = None,
29
+ response_json: dict[str, Any] | None = None,
30
+ response_headers: dict[str, str] | None = None,
31
+ ) -> None:
32
+ self.message = message
33
+ self.status_code = status_code
34
+ self.response_text = response_text
35
+ self.response_json = response_json
36
+ self.response_headers = response_headers
37
+ super().__init__(message)
38
+
39
+ def __str__(self) -> str:
40
+ parts = [self.message]
41
+
42
+ if self.status_code:
43
+ parts.append(f"Status: {self.status_code}")
44
+
45
+ if self.response_text:
46
+ parts.append(f"Response Text: {self.response_text}")
47
+
48
+ if self.response_json:
49
+ parts.append(f"Response JSON: {self.response_json}")
50
+
51
+ if self.response_headers:
52
+ parts.append(f"Headers: {self.response_headers}")
53
+
54
+ return " | ".join(parts)
55
+
56
+ @classmethod
57
+ def from_httpx_error(cls, error: httpx.HTTPStatusError, context: str = "") -> Self:
58
+ """Create a RequestError from an HTTPx error response.
59
+
60
+ Args:
61
+ error: The HTTPx error response.
62
+ context: Additional context to include in the error message.
63
+
64
+ Returns:
65
+ A RequestError instance.
66
+ """
67
+ response = error.response
68
+ status_code = response.status_code
69
+ response_text = response.text
70
+ response_headers = dict(response.headers)
71
+
72
+ # Try to get detailed error info from JSON if available
73
+ response_json = None
74
+ try:
75
+ response_json = response.json()
76
+ detail = response_json.get("detail")
77
+ if detail:
78
+ message = f"Request failed: {detail}"
79
+ else:
80
+ # If no detail field but we have JSON, include a summary
81
+ message = f"Request failed with status {status_code}"
82
+ if len(response_json) <= 5: # If it's a small object, include it in the message
83
+ message += f" - JSON response: {response_json}"
84
+ except Exception:
85
+ # Fallback to simple message if JSON parsing fails
86
+ message = f"Request failed with status {status_code}"
87
+
88
+ # Add context if provided
89
+ if context:
90
+ message = f"{context}: {message}"
91
+
92
+ # Log the error details
93
+ logger.error(
94
+ "HTTP error from HUD SDK: %s | URL: %s | Status: %s | Response: %s%s",
95
+ message,
96
+ response.url,
97
+ status_code,
98
+ response_text[:500],
99
+ "..." if len(response_text) > 500 else "",
100
+ )
101
+ return cls(
102
+ message=message,
103
+ status_code=status_code,
104
+ response_text=response_text,
105
+ response_json=response_json,
106
+ response_headers=response_headers,
107
+ )
108
+
109
+
110
+ class HudResponseError(HudException):
111
+ """Raised when an API response is invalid or missing required data.
112
+
113
+ This exception is raised when we receive a successful response (e.g. 200)
114
+ but the response data is invalid, missing required fields, or otherwise
115
+ cannot be processed.
116
+
117
+ Attributes:
118
+ message: A human-readable error message
119
+ response_json: The invalid response data
120
+ """
121
+
122
+ def __init__(
123
+ self,
124
+ message: str,
125
+ response_json: dict[str, Any] | None = None,
126
+ ) -> None:
127
+ self.message = message
128
+ self.response_json = response_json
129
+ super().__init__(message)
130
+
131
+ def __str__(self) -> str:
132
+ parts = [self.message]
133
+ if self.response_json:
134
+ parts.append(f"Response: {self.response_json}")
135
+ return " | ".join(parts)
136
+
137
+
138
+ class HudAuthenticationError(HudException):
139
+ """Raised when authentication with the HUD API fails.
140
+
141
+ This exception is raised when an API key is missing, invalid, or
142
+ has insufficient permissions for the requested operation.
143
+ """
144
+
145
+
146
+ class HudRateLimitError(HudException):
147
+ """Raised when the rate limit for the HUD API is exceeded.
148
+
149
+ This exception is raised when too many requests are made in a
150
+ short period of time.
151
+ """
152
+
153
+
154
+ class HudTimeoutError(HudException):
155
+ """Raised when a request to the HUD API times out.
156
+
157
+ This exception is raised when a request takes longer than the
158
+ configured timeout period.
159
+ """
160
+
161
+
162
+ class HudNetworkError(HudException):
163
+ """Raised when there is a network-related error.
164
+
165
+ This exception is raised when there are issues with the network
166
+ connection, DNS resolution, or other network-related problems.
167
+ """
hud/gym.py CHANGED
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
17
17
 
18
18
  logger = logging.getLogger("hud.gym")
19
19
 
20
+
20
21
  async def make(
21
22
  env_src: Gym | Task,
22
23
  *,
@@ -26,7 +27,7 @@ async def make(
26
27
  ) -> Environment:
27
28
  """
28
29
  Create an environment from an environment ID or a Task object.
29
-
30
+
30
31
  Args:
31
32
  env_src: Environment ID or Task object
32
33
  job: Job object to associate with this environment
@@ -35,7 +36,7 @@ async def make(
35
36
  """
36
37
  if metadata is None:
37
38
  metadata = {}
38
-
39
+
39
40
  # Handle job parameter
40
41
  effective_job_id = None
41
42
  if job is not None:
@@ -45,13 +46,14 @@ async def make(
45
46
  else:
46
47
  # Try to get an active job from the decorator context
47
48
  try:
48
- from hud.job import get_active_job
49
- active_job = get_active_job()
49
+ import hud.job
50
+
51
+ active_job = hud.job.get_active_job()
50
52
  if active_job:
51
53
  effective_job_id = active_job.id
52
54
  except ImportError:
53
55
  pass # Module not available, skip
54
-
56
+
55
57
  gym = None
56
58
  task = None
57
59
  if isinstance(env_src, str | CustomGym):
@@ -77,7 +79,7 @@ async def make(
77
79
  )
78
80
  else:
79
81
  raise ValueError(f"Invalid environment location: {gym.location}")
80
-
82
+
81
83
  # Set up the environment with a source path
82
84
  if gym.controller_source_dir:
83
85
  logger.info("Setting source path")
@@ -101,7 +103,7 @@ async def make(
101
103
 
102
104
  # Create the environment itself
103
105
  environment = Environment(client=client, metadata=metadata, task=task, build_data=build_data)
104
-
106
+
105
107
  if task:
106
108
  await environment._setup()
107
109