hud-python 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

@@ -11,6 +11,7 @@ from anthropic.types.beta import (
11
11
  BetaImageBlockParam,
12
12
  )
13
13
 
14
+ from hud.adapters.common.types import CLA
14
15
  from hud.agent import Agent
15
16
  from hud.adapters import Adapter
16
17
  from hud.settings import settings
@@ -128,7 +129,7 @@ def extract_json_from_response(response: str) -> str:
128
129
  return response.strip()
129
130
 
130
131
 
131
- class ClaudePlaysPokemon(Agent[AsyncAnthropic, None]):
132
+ class ClaudePlaysPokemon(Agent[AsyncAnthropic, CLA]):
132
133
  """AI agent that plays Pokémon games using Claude."""
133
134
 
134
135
  def __init__(
@@ -113,8 +113,8 @@ class RemoteDockerClient(DockerClient):
113
113
 
114
114
  logger.info("Creating remote environment")
115
115
 
116
- true_gym_id = await get_gym_id("local-docker")
117
- # true_gym_id = await get_gym_id("docker")
116
+ # true_gym_id = await get_gym_id("local-docker")
117
+ true_gym_id = await get_gym_id("docker")
118
118
 
119
119
  # augment metadata with dockerfile
120
120
  if "environment_config" not in metadata:
hud/job.py CHANGED
@@ -1,12 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import asyncio
4
- import datetime
5
4
  import functools
6
5
  import inspect
7
6
  import logging
8
7
  import sys
9
8
  from collections.abc import Callable, Coroutine
9
+ from datetime import datetime
10
10
  from typing import TYPE_CHECKING, Any, TypeVar, cast
11
11
 
12
12
  from pydantic import BaseModel, PrivateAttr, TypeAdapter
@@ -44,7 +44,7 @@ class Job(BaseModel):
44
44
  id: str
45
45
  name: str
46
46
  metadata: dict[str, Any] | None = None
47
- created_at: datetime.datetime
47
+ created_at: datetime
48
48
  status: str
49
49
 
50
50
  # Internal cache for trajectories
@@ -164,13 +164,15 @@ async def create_job(
164
164
  # If not, we might need to make a subsequent GET request
165
165
  job_data = data # Adjust if the API response structure is different
166
166
 
167
+ created_at = datetime.fromisoformat(job_data["created_at"].replace("Z", "+00:00"))
168
+
167
169
  logger.info("View job at https://app.hud.so/jobs/%s.", job_data["id"])
168
170
 
169
171
  return Job(
170
172
  id=job_data["id"],
171
173
  name=job_data["name"],
172
174
  metadata=job_data.get("metadata", {}), # Ensure metadata is dict
173
- created_at=datetime.datetime.fromisoformat(job_data["created_at"]), # Parse datetime
175
+ created_at=created_at, # Parse datetime
174
176
  status=job_data["status"],
175
177
  )
176
178
 
@@ -379,7 +381,7 @@ async def _execute_task(
379
381
  "type": "step_error",
380
382
  "step": step + 1,
381
383
  "error": str(agent_step_err),
382
- "timestamp": datetime.datetime.now().isoformat(),
384
+ "timestamp": datetime.now().isoformat(),
383
385
  }
384
386
  )
385
387
  continue
@@ -413,7 +415,7 @@ async def _execute_task(
413
415
  "task_id": task_id,
414
416
  "type": "evaluation_error",
415
417
  "error": str(eval_err),
416
- "timestamp": datetime.datetime.now().isoformat(),
418
+ "timestamp": datetime.now().isoformat(),
417
419
  }
418
420
  )
419
421
 
@@ -427,7 +429,7 @@ async def _execute_task(
427
429
  "task_id": task_id,
428
430
  "type": "setup_error",
429
431
  "error": str(e),
430
- "timestamp": datetime.datetime.now().isoformat(),
432
+ "timestamp": datetime.now().isoformat(),
431
433
  }
432
434
  )
433
435
 
@@ -447,7 +449,7 @@ async def _execute_task(
447
449
  "task_id": task_id,
448
450
  "type": "env_close_error",
449
451
  "error": str(close_err),
450
- "timestamp": datetime.datetime.now().isoformat(),
452
+ "timestamp": datetime.now().isoformat(),
451
453
  }
452
454
  )
453
455
 
@@ -532,8 +534,6 @@ async def run_job(
532
534
  Returns:
533
535
  The created Job object with errors stored in job.errors.
534
536
  """
535
- hud_logger = logging.getLogger("hud")
536
- hud_logger.setLevel(logging.CRITICAL)
537
537
 
538
538
  tasks_to_run: list[Task] = []
539
539
  created_job: Job | None = None
hud/server/requests.py CHANGED
@@ -6,6 +6,7 @@ from __future__ import annotations
6
6
 
7
7
  import asyncio
8
8
  import logging
9
+ import ssl
9
10
  import time
10
11
  from typing import Any
11
12
 
@@ -20,7 +21,7 @@ from hud.exceptions import (
20
21
 
21
22
  # Set up logger
22
23
  logger = logging.getLogger("hud.http")
23
- logger.setLevel(logging.DEBUG)
24
+ logger.setLevel(logging.INFO)
24
25
 
25
26
 
26
27
  # Long running requests can take up to 10 minutes.
@@ -37,7 +38,7 @@ async def _handle_retry(
37
38
  ) -> None:
38
39
  """Helper function to handle retry logic and logging."""
39
40
  retry_time = retry_delay * (2 ** (attempt - 1)) # Exponential backoff
40
- logger.warning(
41
+ logger.debug(
41
42
  "%s from %s, retrying in %.2f seconds (attempt %d/%d)",
42
43
  error_msg,
43
44
  url,
@@ -140,6 +141,12 @@ async def make_request(
140
141
  continue
141
142
  else:
142
143
  raise HudNetworkError(f"Network error: {e!s}") from None
144
+ except ssl.SSLError as e:
145
+ if attempt <= max_retries:
146
+ await _handle_retry(attempt, max_retries, retry_delay, url, f"SSL error: {e}")
147
+ continue
148
+ else:
149
+ raise HudNetworkError(f"SSL error: {e!s}") from None
143
150
  except Exception as e:
144
151
  raise HudRequestError(f"Unexpected error: {e!s}") from None
145
152
  raise HudRequestError(f"Request failed after {max_retries} retries with unknown error")
@@ -201,7 +208,7 @@ def make_request_sync(
201
208
  # Check if we got a retriable status code
202
209
  if response.status_code in retry_status_codes and attempt <= max_retries:
203
210
  retry_time = retry_delay * (2 ** (attempt - 1)) # Exponential backoff
204
- logger.warning(
211
+ logger.debug(
205
212
  "Received status %d from %s, retrying in %.2f seconds (attempt %d/%d)",
206
213
  response.status_code,
207
214
  url,
@@ -222,7 +229,7 @@ def make_request_sync(
222
229
  except httpx.RequestError as e:
223
230
  if attempt <= max_retries:
224
231
  retry_time = retry_delay * (2 ** (attempt - 1))
225
- logger.warning(
232
+ logger.debug(
226
233
  "Network error %s from %s, retrying in %.2f seconds (attempt %d/%d)",
227
234
  str(e),
228
235
  url,
@@ -234,6 +241,21 @@ def make_request_sync(
234
241
  continue
235
242
  else:
236
243
  raise HudNetworkError(f"Network error: {e!s}") from None
244
+ except ssl.SSLError as e:
245
+ if attempt <= max_retries:
246
+ retry_time = retry_delay * (2 ** (attempt - 1)) # Exponential backoff
247
+ logger.debug(
248
+ "SSL error %s from %s, retrying in %.2f seconds (attempt %d/%d)",
249
+ str(e),
250
+ url,
251
+ retry_time,
252
+ attempt,
253
+ max_retries,
254
+ )
255
+ time.sleep(retry_time)
256
+ continue
257
+ else:
258
+ raise HudNetworkError(f"SSL error: {e!s}") from None
237
259
  except Exception as e:
238
260
  raise HudRequestError(f"Unexpected error: {e!s}") from None
239
261
  raise HudRequestError(f"Request failed after {max_retries} retries with unknown error")
hud/settings.py CHANGED
@@ -15,7 +15,7 @@ class Settings(BaseSettings):
15
15
  model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="allow")
16
16
 
17
17
  base_url: str = Field(
18
- default="https://orcstaging.hud.so/hud-gym/api",
18
+ default="https://orchestration.hud.so/hud-gym/api",
19
19
  description="Base URL for the HUD API",
20
20
  validation_alias="base_url",
21
21
  )
hud/taskset.py CHANGED
@@ -86,15 +86,27 @@ class TaskSet(BaseModel):
86
86
  # Convert all tasks to expanded configs
87
87
  processed_tasks = []
88
88
  for task in self.tasks:
89
- setup_config = create_remote_config(None, task.setup, REMOTE_SETUP)[0].args[0]
90
- evaluate_config = create_remote_config(None, task.evaluate, REMOTE_EVALUATE)[0].args[0]
89
+ if task.setup is not None:
90
+ setup_config = (
91
+ create_remote_config(None, task.setup, REMOTE_SETUP)[0].args[0].model_dump()
92
+ )
93
+ else:
94
+ setup_config = None
95
+ if task.evaluate is not None:
96
+ evaluate_config = (
97
+ create_remote_config(None, task.evaluate, REMOTE_EVALUATE)[0]
98
+ .args[0]
99
+ .model_dump()
100
+ )
101
+ else:
102
+ evaluate_config = None
91
103
 
92
104
  processed_tasks.append(
93
105
  {
94
106
  "prompt": task.prompt,
95
107
  "gym": task.gym,
96
- "setup": setup_config.model_dump(),
97
- "evaluate": evaluate_config.model_dump(),
108
+ "setup": setup_config,
109
+ "evaluate": evaluate_config,
98
110
  "config": task.config,
99
111
  }
100
112
  )
hud/telemetry/context.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import contextvars
4
4
  import logging
5
+ from collections import defaultdict
5
6
  from datetime import datetime
6
7
  from typing import Any, TypeVar
7
8
 
@@ -11,7 +12,6 @@ from hud.telemetry.mcp_models import (
11
12
  MCPNotificationCall,
12
13
  MCPRequestCall,
13
14
  MCPResponseCall,
14
- MCPTelemetryRecord,
15
15
  StatusType,
16
16
  )
17
17
 
@@ -21,9 +21,8 @@ logger = logging.getLogger("hud.telemetry")
21
21
  current_task_run_id: contextvars.ContextVar[str | None] = contextvars.ContextVar(
22
22
  "current_task_run_id", default=None
23
23
  )
24
- mcp_calls_buffer: contextvars.ContextVar[list[BaseMCPCall] | None] = contextvars.ContextVar(
25
- "mcp_calls_buffer", default=None
26
- )
24
+ # NEW: Global dictionary for buffering, keyed by task_run_id
25
+ _GLOBAL_MCP_CALL_BUFFERS: defaultdict[str, list[BaseMCPCall]] = defaultdict(list)
27
26
  is_root_trace: contextvars.ContextVar[bool] = contextvars.ContextVar("is_root_trace", default=False)
28
27
 
29
28
  # Maximum buffer size before automatic flush
@@ -35,51 +34,37 @@ T = TypeVar("T", bound=BaseMCPCall)
35
34
 
36
35
  def get_current_task_run_id() -> str | None:
37
36
  """Get the task_run_id for the current trace context."""
38
- value = current_task_run_id.get()
39
- # Convert empty string sentinel back to None
40
- return None if value == "" else value
37
+ return current_task_run_id.get()
41
38
 
42
39
 
43
40
  def set_current_task_run_id(task_run_id: str | None) -> None:
44
41
  """Set the task_run_id for the current trace context."""
45
- # Handle None value by using empty string as sentinel
46
- value_to_set = "" if task_run_id is None else task_run_id
47
- current_task_run_id.set(value_to_set)
42
+ current_task_run_id.set(task_run_id)
48
43
 
49
44
 
50
45
  def buffer_mcp_call(record: BaseMCPCall | dict[str, Any]) -> None:
51
- """
52
- Add an MCP call to the buffer for the current trace.
53
-
54
- Args:
55
- record: Either a Pydantic model instance or dictionary with MCP call data
56
- """
57
- # Only buffer if we have an active trace
58
46
  task_run_id = get_current_task_run_id()
59
- if task_run_id is not None and task_run_id != "":
60
- buffer = mcp_calls_buffer.get()
61
- if buffer is None:
62
- buffer = []
63
47
 
64
- # Convert dictionary to proper model if needed
65
- if isinstance(record, dict):
66
- record = BaseMCPCall.from_dict(record)
48
+ if not task_run_id:
49
+ logger.warning(
50
+ "BUFFER_MCP_CALL: No task_run_id. Skipping buffer for %s", type(record).__name__
51
+ )
52
+ return
67
53
 
68
- # Ensure the record has the current task_run_id
69
- if record.task_run_id != task_run_id:
70
- # Create a copy with the current task_run_id
71
- record_dict = record.model_dump()
72
- record_dict["task_run_id"] = task_run_id
73
- record = BaseMCPCall.from_dict(record_dict)
54
+ # Ensure 'record' is a Pydantic model instance from here
55
+ if isinstance(record, dict):
56
+ try:
57
+ record_model = BaseMCPCall.from_dict(record)
58
+ record = record_model
59
+ except Exception as e_conv:
60
+ logger.exception("BUFFER_MCP_CALL: Failed to convert dict to BaseMCPCall: %s", e_conv)
61
+ return
74
62
 
75
- # Add to buffer
76
- buffer.append(record)
77
- mcp_calls_buffer.set(buffer)
63
+ _GLOBAL_MCP_CALL_BUFFERS[task_run_id].append(record)
64
+ buffer_len = len(_GLOBAL_MCP_CALL_BUFFERS[task_run_id])
78
65
 
79
- # Auto-flush if buffer gets too large
80
- if len(buffer) >= MAX_BUFFER_SIZE:
81
- logger.debug("MCP calls buffer reached size %d, auto-flushing", len(buffer))
82
- flush_buffer(export=True)
66
+ if buffer_len >= MAX_BUFFER_SIZE:
67
+ flush_buffer(export=True)
83
68
 
84
69
 
85
70
  def flush_buffer(export: bool = False) -> list[BaseMCPCall]:
@@ -92,25 +77,16 @@ def flush_buffer(export: bool = False) -> list[BaseMCPCall]:
92
77
  Returns:
93
78
  The list of buffered MCP calls
94
79
  """
95
- buffer = mcp_calls_buffer.get()
96
- if buffer is None:
97
- buffer = []
98
- # Reset buffer to empty list
99
- mcp_calls_buffer.set([])
100
-
101
- if export and buffer and len(buffer) > 0:
102
- task_id = buffer[0].task_run_id if buffer else None
103
- if task_id:
104
- logger.debug("Exporting %d MCP calls for task run %s", len(buffer), task_id)
105
- # Create a telemetry record for export
106
- _telemetry_record = MCPTelemetryRecord(task_run_id=task_id, records=buffer)
107
- # In the future, we could call an export function here
108
- # For now, just log that we have telemetry
109
- logger.debug("MCP telemetry record created with %d calls", len(buffer))
110
- else:
111
- logger.warning("No task_run_id found in buffer, skipping export")
112
-
113
- return buffer
80
+ task_run_id = get_current_task_run_id()
81
+ if not task_run_id:
82
+ logger.warning("FLUSH_BUFFER: No current task_run_id. Cannot flush.")
83
+ return []
84
+
85
+ buffer_for_task = _GLOBAL_MCP_CALL_BUFFERS.pop(
86
+ task_run_id, []
87
+ ) # Get and remove the list for this task
88
+
89
+ return buffer_for_task # Return the flushed items
114
90
 
115
91
 
116
92
  def create_request_record(
@@ -150,6 +126,7 @@ def create_response_record(
150
126
  is_error=is_error,
151
127
  **kwargs,
152
128
  )
129
+
153
130
  buffer_mcp_call(record)
154
131
  return record
155
132
 
@@ -189,5 +166,4 @@ def create_manual_test_record(**custom_data: Any) -> MCPManualTestCall | None:
189
166
  def reset_context() -> None:
190
167
  """Reset all telemetry context variables. Useful for test isolation."""
191
168
  set_current_task_run_id(None)
192
- mcp_calls_buffer.set([])
193
169
  is_root_trace.set(False)
@@ -31,9 +31,6 @@ from hud.telemetry.mcp_models import DirectionType, MCPCallType, MCPManualTestCa
31
31
 
32
32
  logger = logging.getLogger(__name__)
33
33
 
34
- # Ensure no OTel imports remain
35
- # from opentelemetry import context as otel_context, propagate # Should be removed
36
-
37
34
 
38
35
  class MCPInstrumentor:
39
36
  """
@@ -140,9 +140,13 @@ class TestMCPCallBuffer:
140
140
 
141
141
  # Flush should return all calls from both tasks
142
142
  result = flush_buffer()
143
- assert len(result) == 2
144
- assert result[0] == mock_call_1
145
- assert result[1] == mock_call_2
143
+ assert len(result) == 1
144
+ assert result[0] == mock_call_2
145
+
146
+ set_current_task_run_id("task-1")
147
+ result2 = flush_buffer()
148
+ assert len(result2) == 1
149
+ assert result2[0] == mock_call_1
146
150
 
147
151
  def test_buffer_mcp_call_without_task_id(self):
148
152
  """Test adding MCP call when no task run ID is set."""
hud/types.py CHANGED
@@ -48,7 +48,7 @@ class EnvironmentStatus(str, enum.Enum):
48
48
 
49
49
 
50
50
  # Available HUD gyms
51
- ServerGym: TypeAlias = Literal["qa", "hud-browser", "hud-ubuntu", "OSWorld-Ubuntu"]
51
+ ServerGym: TypeAlias = Literal["qa", "hud-browser", "OSWorld-Ubuntu"]
52
52
 
53
53
  # Gyms can be either custom or server-side
54
54
  Gym: TypeAlias = CustomGym | ServerGym
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.2.5"
8
+ assert hud.__version__ == "0.2.6"
hud/version.py CHANGED
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.2.5"
7
+ __version__ = "0.2.6"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: SDK for the HUD evaluation platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-sdk
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-sdk/issues
@@ -74,17 +74,17 @@ Description-Content-Type: text/markdown
74
74
  </div>
75
75
 
76
76
  <h3>
77
- Create, evaluate, and improve AI agents across web browsers, desktop environments, and custom scenarios.
77
+ Evaluate your Computer Use AI agents across web browsers, desktop environments, and custom scenarios.
78
78
  </h3>
79
79
 
80
- > ### 🚀 Are you a startup building agents?
81
- >
82
- > [📅 Hop on a call ](https://cal.com/jay-ram-z6st6w/demo) or [📧 founders@hud.so](mailto:founders@hud.so)
83
- >
84
- > We're here to help with eval strategies, custom environments, or improving your agent architecture!
80
+ ### 🚀 Are you a startup building agents?
85
81
 
82
+ [📅 Hop on a call](https://cal.com/jay-ram-z6st6w/demo) or [📧 founders@hud.so](mailto:founders@hud.so)
86
83
 
87
- > **Early Release Notice**: This SDK is currently in early release status. The API is evolving and may change in future releases as we gather feedback and improve functionality.
84
+ We're here to help with eval strategies, custom environments, or improving your agent architecture!
85
+
86
+
87
+ > **Early Release Notice**: We'd love to hear your feedback in [Issues](https://github.com/hud-evals/hud-sdk/issues), as the SDK is still evolving!
88
88
 
89
89
  [![PyPI version](https://img.shields.io/pypi/v/hud-python)](https://pypi.org/project/hud-python/)
90
90
 
@@ -132,23 +132,23 @@ with hud.trace("my-agent-run"):
132
132
  result = await agent.run(task)
133
133
  ```
134
134
 
135
- ## API Key Setup
136
-
137
- Before getting started, you'll need to obtain an API key:
135
+ ## Quick Start
138
136
 
139
- 1. Visit [app.hud.so](https://app.hud.so) to create a free account and generate your API key
140
- 2. Set it in your environment or .env file:
137
+ ### Installation
141
138
 
142
139
  ```bash
143
- export HUD_API_KEY=your_api_key_here
140
+ pip install hud-python
144
141
  ```
145
142
 
146
- ## Quick Start
143
+ ### API Key Setup
147
144
 
148
- ### Installation
145
+ Before getting started, you'll need to obtain an API key:
146
+
147
+ 1. Visit [app.hud.so](https://app.hud.so) to create a free account and generate your API key
148
+ 2. Set it in your environment or .env file:
149
149
 
150
150
  ```bash
151
- pip install hud-python
151
+ export HUD_API_KEY=your_api_key_here
152
152
  ```
153
153
 
154
154
  ### Simple Browser Example with Claude Computer Use
@@ -269,4 +269,4 @@ If you use this SDK in your research, please cite it as follows:
269
269
  url = {https://github.com/hud-evals/hud-sdk},
270
270
  langid = {en}
271
271
  }
272
- ```
272
+ ```
@@ -1,13 +1,13 @@
1
1
  hud/__init__.py,sha256=6PlxwtjYyaqk6UAyHLJZhsiHRlgndH-Jja9f9BtInUY,1063
2
2
  hud/exceptions.py,sha256=pifKvSqxj9_g4NfARVyH5a-lTThhi9XW06tIXaBakQw,5526
3
3
  hud/gym.py,sha256=Dl7nur2QTxoVNAcWIvFjuGAbKmoc7CVgjV5gWd35usU,4544
4
- hud/job.py,sha256=uSF5o5WTgk8d0UxpQ6YlNcw7fXDUaN8Lpu0RL6xbHNc,25188
5
- hud/settings.py,sha256=B3uBiZMhJzsy4Wd41bioN31CQRNfyBN52hLQnkLaJPU,1507
4
+ hud/job.py,sha256=bd88L83L3uqdXE7B3Bjsk8hGk95OggJiLjItFsZXDoQ,25116
5
+ hud/settings.py,sha256=3zALwVbPTaDc01-dR_-rGsrDfc-ieMIcmO5avv6S2Y0,1510
6
6
  hud/task.py,sha256=AMmJLYl3BjX8TfBY4ZuR_QIXhTkWDX-4C_Pbi3HziVg,5505
7
- hud/taskset.py,sha256=WvmwlqnbPbZshZXAfl9qSFeV89k5gHJE4rvluvgz5hk,4581
7
+ hud/taskset.py,sha256=ou2Ivulv392txtDvXDpvJAgHj_4h2LZNBaxpeC_mRnw,4903
8
8
  hud/trajectory.py,sha256=OrcRbxK_ejFp1VhJCjZnM1WCmCXxEOK4CxNjCngcsjo,3721
9
- hud/types.py,sha256=kk93J7DJIL574vZIKhx3Ka1wRxQ3ZAXOaYhdecufuUo,1822
10
- hud/version.py,sha256=sQ1p2i655QksGNtiO7-7ybulzQTw0yiFR_QM9mRm5hk,104
9
+ hud/types.py,sha256=O8eotDLw4onwRreX9xLPZ2T11qf8wwUidaqGI7jvagY,1808
10
+ hud/version.py,sha256=4c7HS3iYXZGe4vuaPb8pgYwVInf3F3G95h0PITpm6aw,104
11
11
  hud/adapters/__init__.py,sha256=zz24KdC_e9TJPgWo6y57_8SzevEE5ak4Cm6tXzMxwRk,266
12
12
  hud/adapters/claude/__init__.py,sha256=i7QEF-29FLb9qxp1eYtXs-adIk_tG54tL-9g6d3xodk,100
13
13
  hud/adapters/claude/adapter.py,sha256=_qUD0iu0_Y_8yuhrsZw2E5wNv8RB-Aa7BqclAmNHdtI,6096
@@ -25,7 +25,7 @@ hud/adapters/operator/tests/test_adapter.py,sha256=4RAXwyxAtkh-1Mlt1zJayRkcv3LWa
25
25
  hud/agent/__init__.py,sha256=_OxMG3UW1vXSuixdpo09b1jexfWcUbfK44zto8t6_LE,453
26
26
  hud/agent/base.py,sha256=d7eMoRPepVSCFUyU1oV1hGvyff8rsPtXDelVcJlEF7Y,4022
27
27
  hud/agent/claude.py,sha256=bXgdzlZHaIHaxrGIYt6w1kEh5oHFDT5P2u304swi8wU,7529
28
- hud/agent/claude_plays_pokemon.py,sha256=cZ_hlYGux16Ucjvj_DOnFgGvu9q93PWriPIQPrifxiY,9968
28
+ hud/agent/claude_plays_pokemon.py,sha256=H1AIXb6qNE-45G5CdBgsDGTMXOgjg44wGtySgeKgeq4,10009
29
29
  hud/agent/langchain.py,sha256=iuMpu-k55Qic0LCzfOR0Wa5kDhIv7zHCc8et3axypus,8833
30
30
  hud/agent/operator.py,sha256=Bji_v6NB-hUyTe1otdpEc3Hb0ZpyQbuL_iZu5irYFe4,8612
31
31
  hud/agent/misc/__init__.py,sha256=-ftYH1T5r7fXKKra6d8jXYmUz9KOTmYwBrPJU-V3S7g,71
@@ -38,7 +38,7 @@ hud/env/docker_client.py,sha256=-nQLGeRl9GLJKEku1eIBL3RQuteIPLrecjVHxzSD_vU,1046
38
38
  hud/env/environment.py,sha256=hSJh5KaiGuWw4IEV1IHNDVjOxyoyOxhHkavGj_5RBbQ,15154
39
39
  hud/env/local_docker_client.py,sha256=bcgmmRJGX1--bf0-5Zlk59l2W0PvABnDn4FWZKvXjjk,9565
40
40
  hud/env/remote_client.py,sha256=gjCzcuotvDC7GraVBBT2Tix5GKpntwtldv5PqnXx8wk,6109
41
- hud/env/remote_docker_client.py,sha256=khdAaNfwdiG6pl6XKGHI6pFqMSPBn0brQPZ-09XhZ-c,9484
41
+ hud/env/remote_docker_client.py,sha256=ZLqbd6IeU9BDndjwanmJN3_1CEsrCkntumGavLiPi88,9484
42
42
  hud/evaluators/__init__.py,sha256=V5nktEAw3EDn2Y537pjia5Y1IjdLBIPrDjTs6YTCdX4,153
43
43
  hud/evaluators/base.py,sha256=ALO9Rj-R_9HtHIHYp84bsQQD12De0XnCTwad78_T5-k,771
44
44
  hud/evaluators/inspect.py,sha256=ZvrTXLpgibyvQ5aNXAMP4quyXISrRQHg9besDcuCx7U,692
@@ -51,19 +51,19 @@ hud/evaluators/tests/test_judge.py,sha256=c1GaAeq_WpBVgBlx-gQncHrOPokzKNxlbgiC8W
51
51
  hud/evaluators/tests/test_match.py,sha256=C04GoluyT9i41YZ65xEjN7tKHQbENbrpNhNtUd4ivmA,3919
52
52
  hud/evaluators/tests/test_remote.py,sha256=YdJpyyuRLkYP0e3jTUkD3zobS2WHQPePn8yBZtYOIN4,3243
53
53
  hud/server/__init__.py,sha256=IPxPCqtPLguryN-nBq78Sakypw2bRiE2iHv3SXG8YRk,139
54
- hud/server/requests.py,sha256=U0WEeY8AzsLMSICebxQyF0FEbXpKieh-3Qnisd0C6a4,7881
54
+ hud/server/requests.py,sha256=AnFW4ELojjvfF6xjS2no6_fg4Rph2aR2hjPzYTede0Q,8841
55
55
  hud/server/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
56
  hud/server/tests/test_requests.py,sha256=63YCbykcib5MxKxm-OgHJPLX3QC7hmgIwnWaYukVM6s,9077
57
57
  hud/telemetry/__init__.py,sha256=ky48kuZD3Bt0vOf9FwZwkV_ka7O26Tvcxh7p1lMpsMk,582
58
58
  hud/telemetry/_trace.py,sha256=W7S6CxwtmjNl4OZbA1SQHXsaNm072J9c-fjPjQomgOY,5135
59
- hud/telemetry/context.py,sha256=pFwst_TLo6izM_k2h--y_qg7uZovdJ36GC7aeaZHMiE,6309
59
+ hud/telemetry/context.py,sha256=PNbfrMgjeRTTg0nUKXYCflqn71I_cSjU8LXdvouUfc4,5209
60
60
  hud/telemetry/exporter.py,sha256=l-r7mADcHpn6i9hhB407hx3HS4khfbhuwX0txJ2X0VQ,17986
61
61
  hud/telemetry/mcp_models.py,sha256=YIArMtCVfC4NVvaEmUYs_kxDs0GQ-xtFFmB8jEGKaag,11342
62
62
  hud/telemetry/instrumentation/__init__.py,sha256=vHmSqaJMMehgRNn6EN2SMoYDD12rSHkLeVmj7Uy1my0,88
63
- hud/telemetry/instrumentation/mcp.py,sha256=ugkcP0V0UNO6Zy4zueF1jPr4jdqLXF81H8HNwHhcDzI,22041
63
+ hud/telemetry/instrumentation/mcp.py,sha256=xGAMdhTgM1ixHiDX7xkS9Ax1NCjK3u7pLWIbIh8WZIA,21925
64
64
  hud/telemetry/instrumentation/registry.py,sha256=UVaSsEA693lvKYd5R3n3ve6GcAB1fwqubRwIVeZiNmo,1821
65
65
  hud/telemetry/tests/__init__.py,sha256=QMN8OzfrBUDbQESwrwHCqXLdDwCjYWX8BJcpeLUJfqA,33
66
- hud/telemetry/tests/test_context.py,sha256=ZQ8ubuTRQoiMq-L3Zl-Ucy8Ma1a--z1gdFoAnFO3m3Q,6421
66
+ hud/telemetry/tests/test_context.py,sha256=BGRDlXXC_VbpD4cYl_o9gRQDDKb2ox1das_ZuX14NC8,6531
67
67
  hud/telemetry/tests/test_trace.py,sha256=JzmjNRtHdQFPqLm7hOPastENg-hMJo9p8bbxJ77iXyc,10687
68
68
  hud/utils/__init__.py,sha256=oSl_gGoS272X2VFnBYX8hLxcP2xgGoBYQXAuLhtQgw8,260
69
69
  hud/utils/common.py,sha256=R83ntEtKr8KXG1mKcy0I_OllFHkDrPMysPhW12uBckc,3926
@@ -76,9 +76,9 @@ hud/utils/tests/test_common.py,sha256=gbYpQKBNdbCcEH0v1UZpxLt_NW2T5sETMIJKvy8S8p
76
76
  hud/utils/tests/test_config.py,sha256=dPlXYWuMrxX-NOYbf0vdJ27TJpfacKG8eiKOSGOcfDU,4079
77
77
  hud/utils/tests/test_progress.py,sha256=QunwDgi_heQXhDgmC25zgjr-sFUu5FdJ_1aYigMKeIc,6351
78
78
  hud/utils/tests/test_telemetry.py,sha256=t0An1RTBaE0dZVEpF4uwuq5k1R-PXFR5k4u71h60tx8,1224
79
- hud/utils/tests/test_version.py,sha256=CPQuXvuX4wOLxEvIKxaaEOLjxVCx_WjCv0XyokJM3X8,159
79
+ hud/utils/tests/test_version.py,sha256=wDkcJbOW78FEZpIGKXST347GOm3o_NBVONZ3RhkYIM4,159
80
80
  hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
- hud_python-0.2.5.dist-info/METADATA,sha256=s6u-MGNcVTbnhFqnzdodctjywOzGx7OMzAPMivBfeaI,9511
82
- hud_python-0.2.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
83
- hud_python-0.2.5.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
84
- hud_python-0.2.5.dist-info/RECORD,,
81
+ hud_python-0.2.6.dist-info/METADATA,sha256=xXaqxhBWDKs-vkGiCi19m4wBSbbuehk4fEd1zP5Ufbg,9469
82
+ hud_python-0.2.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
83
+ hud_python-0.2.6.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
84
+ hud_python-0.2.6.dist-info/RECORD,,