hud-python 0.4.12__py3-none-any.whl → 0.4.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/cli/__init__.py CHANGED
@@ -348,6 +348,11 @@ def dev(
348
348
  ),
349
349
  port: int = typer.Option(8765, "--port", "-p", help="HTTP server port (ignored for stdio)"),
350
350
  no_reload: bool = typer.Option(False, "--no-reload", help="Disable hot-reload"),
351
+ full_reload: bool = typer.Option(
352
+ False,
353
+ "--full-reload",
354
+ help="Restart entire container on file changes (instead of just server process)",
355
+ ),
351
356
  verbose: bool = typer.Option(False, "--verbose", "-v", help="Show server logs"),
352
357
  inspector: bool = typer.Option(
353
358
  False, "--inspector", help="Launch MCP Inspector (HTTP mode only)"
@@ -375,12 +380,13 @@ def dev(
375
380
  hud dev . --inspector # Launch MCP Inspector (HTTP mode only)
376
381
  hud dev . --interactive # Launch interactive testing mode (HTTP mode only)
377
382
  hud dev . --no-logs # Disable Docker log streaming
383
+ hud dev . --full-reload # Restart entire container on file changes (instead of just server)
378
384
 
379
385
  # With Docker arguments (after all options):
380
386
  hud dev . -e BROWSER_PROVIDER=anchorbrowser -e ANCHOR_API_KEY=xxx
381
387
  hud dev . -e API_KEY=secret -v /tmp/data:/data --network host
382
388
  hud dev . --build -e DEBUG=true --memory 2g
383
- """
389
+ """ # noqa: E501
384
390
  # Parse directory and Docker arguments
385
391
  if params:
386
392
  directory = params[0]
@@ -397,6 +403,7 @@ def dev(
397
403
  transport,
398
404
  port,
399
405
  no_reload,
406
+ full_reload,
400
407
  verbose,
401
408
  inspector,
402
409
  no_logs,
hud/cli/dev.py CHANGED
@@ -35,6 +35,7 @@ def create_proxy_server(
35
35
  directory: str | Path,
36
36
  image_name: str,
37
37
  no_reload: bool = False,
38
+ full_reload: bool = False,
38
39
  verbose: bool = False,
39
40
  docker_args: list[str] | None = None,
40
41
  interactive: bool = False,
@@ -48,8 +49,12 @@ def create_proxy_server(
48
49
  design.warning(f"Could not extract CMD from {image_name}, using default")
49
50
  original_cmd = ["python", "-m", "hud_controller.server"]
50
51
 
51
- # Generate container name from image
52
- container_name = f"{image_name.replace(':', '-').replace('/', '-')}"
52
+ # Generate unique container name from image to avoid conflicts between multiple instances
53
+ import os
54
+
55
+ pid = str(os.getpid())[-6:] # Last 6 digits of process ID for uniqueness
56
+ base_name = image_name.replace(":", "-").replace("/", "-")
57
+ container_name = f"{base_name}-{pid}"
53
58
 
54
59
  # Build the docker run command
55
60
  docker_cmd = [
@@ -73,14 +78,20 @@ def create_proxy_server(
73
78
  if interactive:
74
79
  no_reload = True
75
80
 
76
- if not no_reload:
77
- # Inject our supervisor into the CMD
81
+ # Validate reload options
82
+ if no_reload and full_reload:
83
+ design.warning("Cannot use --full-reload with --no-reload, ignoring --full-reload")
84
+ full_reload = False
85
+
86
+ if not no_reload and not full_reload:
87
+ # Standard hot-reload: inject supervisor for server restart within container
78
88
  modified_cmd = inject_supervisor(original_cmd)
79
89
  docker_cmd.extend(["--entrypoint", modified_cmd[0]])
80
90
  docker_cmd.append(image_name)
81
91
  docker_cmd.extend(modified_cmd[1:])
82
92
  else:
83
- # No reload - use original CMD
93
+ # No reload or full reload: use original CMD without supervisor
94
+ # Note: Full reload logic (container restart) would be implemented here in the future
84
95
  docker_cmd.append(image_name)
85
96
 
86
97
  # Create configuration following MCPConfig schema
@@ -96,9 +107,14 @@ def create_proxy_server(
96
107
 
97
108
  # Debug output - only if verbose
98
109
  if verbose:
99
- if not no_reload:
110
+ if not no_reload and not full_reload:
111
+ design.info("Mode: Hot-reload (server restart within container)")
100
112
  design.info("Watching: /app/src for changes")
113
+ elif full_reload:
114
+ design.info("Mode: Full reload (container restart on file changes)")
115
+ design.info("Note: Full container restart not yet implemented, using no-reload mode")
101
116
  else:
117
+ design.info("Mode: No reload")
102
118
  design.info("Container will run without hot-reload")
103
119
  design.command_example(f"docker logs -f {container_name}", "View container logs")
104
120
 
@@ -127,6 +143,7 @@ async def start_mcp_proxy(
127
143
  transport: str,
128
144
  port: int,
129
145
  no_reload: bool = False,
146
+ full_reload: bool = False,
130
147
  verbose: bool = False,
131
148
  inspector: bool = False,
132
149
  no_logs: bool = False,
@@ -212,8 +229,12 @@ async def start_mcp_proxy(
212
229
  design.error(f"Source directory not found: {src_path}")
213
230
  raise click.Abort
214
231
 
215
- # Extract container name from the proxy configuration
216
- container_name = f"{image_name.replace(':', '-').replace('/', '-')}"
232
+ # Extract container name from the proxy configuration (must match create_proxy_server naming)
233
+ import os
234
+
235
+ pid = str(os.getpid())[-6:] # Last 6 digits of process ID for uniqueness
236
+ base_name = image_name.replace(":", "-").replace("/", "-")
237
+ container_name = f"{base_name}-{pid}"
217
238
 
218
239
  # Remove any existing container with the same name (silently)
219
240
  # Note: The proxy creates containers on-demand when clients connect
@@ -347,6 +368,7 @@ async def start_mcp_proxy(
347
368
  # Always show waiting message
348
369
  log_design.info("") # Empty line for spacing
349
370
  log_design.progress_message("⏳ Waiting for first client connection to start container...")
371
+ log_design.info(f"📋 Looking for container: {container_name}") # noqa: G004
350
372
 
351
373
  # Keep trying to stream logs - container is created on demand
352
374
  has_shown_started = False
@@ -397,7 +419,8 @@ async def start_mcp_proxy(
397
419
 
398
420
  # Show all logs with gold formatting like hud debug
399
421
  # Format all logs in gold/dim style like hud debug's stderr
400
- log_design.console.print(
422
+ # Use stdout console to avoid stderr redirection when not verbose
423
+ log_design._stdout_console.print(
401
424
  f"[rgb(192,150,12)]■[/rgb(192,150,12)] {decoded_line}", highlight=False
402
425
  )
403
426
 
@@ -408,16 +431,19 @@ async def start_mcp_proxy(
408
431
  await asyncio.sleep(1)
409
432
  continue # Loop back to check if container exists
410
433
 
411
- except Exception:
412
- # Some unexpected error
434
+ except Exception as e:
435
+ # Some unexpected error - show it so we can debug
436
+ log_design.warning(f"Failed to stream Docker logs: {e}") # noqa: G004
413
437
  if verbose:
414
- log_design.warning("Failed to stream logs")
438
+ import traceback
439
+
440
+ log_design.warning(f"Traceback: {traceback.format_exc()}") # noqa: G004
415
441
  await asyncio.sleep(1)
416
442
 
417
443
  # CRITICAL: Create proxy AFTER all logging setup to prevent it from resetting logging config
418
444
  # This is important because FastMCP might initialize loggers during creation
419
445
  proxy = create_proxy_server(
420
- directory, image_name, no_reload, verbose, docker_args or [], interactive
446
+ directory, image_name, no_reload, full_reload, verbose, docker_args or [], interactive
421
447
  )
422
448
 
423
449
  # One more attempt to suppress the FastMCP server log
@@ -548,6 +574,7 @@ def run_mcp_dev_server(
548
574
  transport: str = "http",
549
575
  port: int = 8765,
550
576
  no_reload: bool = False,
577
+ full_reload: bool = False,
551
578
  verbose: bool = False,
552
579
  inspector: bool = False,
553
580
  no_logs: bool = False,
@@ -706,6 +733,7 @@ def run_mcp_dev_server(
706
733
  transport,
707
734
  port,
708
735
  no_reload,
736
+ full_reload,
709
737
  verbose,
710
738
  inspector,
711
739
  no_logs,
hud/cli/eval.py CHANGED
@@ -26,15 +26,6 @@ def build_agent(
26
26
  """Create and return the requested agent type."""
27
27
 
28
28
  # Import agents lazily to avoid dependency issues
29
- try:
30
- from hud.agents.misc.response_agent import ResponseAgent
31
- except ImportError as e:
32
- design.error(
33
- "Agent dependencies are not installed. "
34
- "Please install with: pip install 'hud-python[agent]'"
35
- )
36
- raise typer.Exit(1) from e
37
-
38
29
  if agent_type == "openai":
39
30
  try:
40
31
  from hud.agents import OperatorAgent
@@ -45,12 +36,12 @@ def build_agent(
45
36
  )
46
37
  raise typer.Exit(1) from e
47
38
 
48
- allowed_tools = allowed_tools or ["openai_computer"]
49
-
50
- return OperatorAgent(
51
- allowed_tools=allowed_tools,
52
- response_agent=ResponseAgent(),
53
- )
39
+ if allowed_tools:
40
+ return OperatorAgent(
41
+ allowed_tools=allowed_tools,
42
+ )
43
+ else:
44
+ return OperatorAgent()
54
45
 
55
46
  # Fallback Claude agent (Anthropic)
56
47
  try:
@@ -63,13 +54,16 @@ def build_agent(
63
54
  raise typer.Exit(1) from e
64
55
 
65
56
  model = model or "claude-sonnet-4-20250514"
66
- allowed_tools = allowed_tools or ["anthropic_computer"]
67
57
 
68
- return ClaudeAgent(
69
- model=model,
70
- allowed_tools=allowed_tools,
71
- response_agent=ResponseAgent(),
72
- )
58
+ if allowed_tools:
59
+ return ClaudeAgent(
60
+ model=model,
61
+ allowed_tools=allowed_tools,
62
+ )
63
+ else:
64
+ return ClaudeAgent(
65
+ model=model,
66
+ )
73
67
 
74
68
 
75
69
  async def run_single_task(
@@ -100,8 +94,8 @@ async def run_single_task(
100
94
  with open(path) as f: # noqa: ASYNC230
101
95
  json_data = json.load(f)
102
96
 
103
- # Check if JSON contains a list of tasks
104
- if isinstance(json_data, list):
97
+ # Check if JSON contains multiple tasks (list with more than 1 task)
98
+ if isinstance(json_data, list) and len(json_data) > 1:
105
99
  design.info(f"Found {len(json_data)} tasks in JSON file, running as dataset…")
106
100
 
107
101
  # Build agent class and config for run_dataset
@@ -118,8 +112,10 @@ async def run_single_task(
118
112
  raise typer.Exit(1) from e
119
113
 
120
114
  agent_config: dict[str, Any] = {
121
- "allowed_tools": allowed_tools or ["openai_computer"],
122
115
  }
116
+ if allowed_tools:
117
+ agent_config["allowed_tools"] = allowed_tools
118
+
123
119
  else:
124
120
  try:
125
121
  from hud.agents import ClaudeAgent
@@ -134,8 +130,9 @@ async def run_single_task(
134
130
 
135
131
  agent_config = {
136
132
  "model": model or "claude-sonnet-4-20250514",
137
- "allowed_tools": allowed_tools or ["anthropic_computer"],
138
133
  }
134
+ if allowed_tools:
135
+ agent_config["allowed_tools"] = allowed_tools
139
136
 
140
137
  # Run as dataset with single-task concurrency to maintain debug behavior
141
138
  results = await run_dataset(
@@ -146,7 +143,6 @@ async def run_single_task(
146
143
  max_concurrent=1, # Run sequentially for debug mode
147
144
  metadata={"source": str(path)},
148
145
  max_steps=max_steps,
149
- auto_respond=True,
150
146
  )
151
147
 
152
148
  # Display summary
@@ -154,8 +150,15 @@ async def run_single_task(
154
150
  design.success(f"Completed {len(results)} tasks: {successful} successful")
155
151
  return
156
152
 
157
- # Single task JSON
158
- task = Task(**json_data)
153
+ # Single task JSON (either direct object or list with 1 task)
154
+ if isinstance(json_data, list) and len(json_data) == 1:
155
+ design.info("Found 1 task in JSON file, running as single task…")
156
+ task = Task(**json_data[0])
157
+ elif isinstance(json_data, dict):
158
+ task = Task(**json_data)
159
+ else:
160
+ design.error("JSON file must contain a list of tasks when using --full flag")
161
+ raise typer.Exit(1)
159
162
  else:
160
163
  # Load from HuggingFace dataset
161
164
  try:
@@ -238,8 +241,10 @@ async def run_full_dataset(
238
241
  raise typer.Exit(1) from e
239
242
 
240
243
  agent_config: dict[str, Any] = {
241
- "allowed_tools": allowed_tools or ["openai_computer"],
242
244
  }
245
+ if allowed_tools:
246
+ agent_config["allowed_tools"] = allowed_tools
247
+
243
248
  else:
244
249
  try:
245
250
  from hud.agents import ClaudeAgent
@@ -254,8 +259,9 @@ async def run_full_dataset(
254
259
 
255
260
  agent_config = {
256
261
  "model": model or "claude-sonnet-4-20250514",
257
- "allowed_tools": allowed_tools or ["anthropic_computer"],
258
262
  }
263
+ if allowed_tools:
264
+ agent_config["allowed_tools"] = allowed_tools
259
265
 
260
266
  design.info("🚀 Running evaluation…")
261
267
  return await run_dataset(
@@ -266,7 +272,6 @@ async def run_full_dataset(
266
272
  max_concurrent=max_concurrent,
267
273
  metadata={"dataset": source},
268
274
  max_steps=max_steps,
269
- auto_respond=True,
270
275
  )
271
276
 
272
277
 
hud/cli/init.py CHANGED
@@ -15,20 +15,14 @@ DOCKERFILE_TEMPLATE = """FROM python:3.11-slim
15
15
 
16
16
  WORKDIR /app
17
17
 
18
- # Install git for hud-python dependency
19
- RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
20
-
21
18
  # Copy and install dependencies
22
19
  COPY pyproject.toml ./
23
20
  COPY src/ ./src/
24
21
  RUN pip install --no-cache-dir -e .
25
22
 
26
- # Set logging to stderr
27
- ENV HUD_LOG_STREAM=stderr
28
-
29
23
  # Start context server in background, then MCP server
30
- CMD ["sh", "-c", "python -m hud_controller.context & sleep 1 && exec python -m hud_controller.server"]
31
- """ # noqa: E501
24
+ CMD ["sh", "-c", "python -m controller.env & sleep 1 && exec python -m controller.server"]
25
+ """
32
26
 
33
27
  PYPROJECT_TEMPLATE = """[project]
34
28
  name = "{name}"
@@ -50,112 +44,472 @@ image = "{name}:dev"
50
44
  allow-direct-references = true
51
45
 
52
46
  [tool.hatch.build.targets.wheel]
53
- packages = ["src/hud_controller"]
47
+ packages = ["src/controller"]
54
48
  """
55
49
 
56
- CONTEXT_TEMPLATE = '''"""Minimal context that persists across hot-reloads."""
50
+ ENV_TEMPLATE = '''"""Minimal environment that persists across hot-reloads."""
57
51
  from hud.server.context import run_context_server
58
52
  import asyncio
59
53
 
60
- class Context:
54
+ class Environment:
55
+ """Simple counter environment."""
56
+
61
57
  def __init__(self):
62
58
  self.count = 0
63
59
 
64
60
  def act(self):
61
+ """Increment the counter."""
65
62
  self.count += 1
66
63
  return self.count
67
64
 
68
65
  def get_count(self):
66
+ """Get current counter."""
69
67
  return self.count
68
+
69
+ def reset(self):
70
+ """Reset counter to zero."""
71
+ self.count = 0
70
72
 
71
73
  if __name__ == "__main__":
72
- asyncio.run(run_context_server(Context()))
74
+ asyncio.run(run_context_server(Environment(), sock_path="/tmp/hud_ctx.sock"))
73
75
  '''
74
76
 
75
77
  SERVER_TEMPLATE = '''"""Minimal MCP server for HUD."""
78
+ import sys
79
+ import logging
76
80
  from hud.server import MCPServer
77
81
  from hud.server.context import attach_context
82
+ from hud.tools.types import EvaluationResult
83
+
84
+ # Configure logging to stderr
85
+ logging.basicConfig(
86
+ stream=sys.stderr,
87
+ level=logging.INFO,
88
+ format='[%(levelname)s] %(asctime)s | %(name)s | %(message)s'
89
+ )
78
90
 
79
91
  mcp = MCPServer(name="{name}")
80
- ctx = None
92
+ env = None
81
93
 
82
94
  @mcp.initialize
83
- async def init(init_ctx):
84
- global ctx
85
- ctx = attach_context("/tmp/hud_ctx.sock")
95
+ async def init(ctx):
96
+ global env
97
+ env = attach_context("/tmp/hud_ctx.sock")
98
+ logging.info("Connected to context server")
86
99
 
87
100
  @mcp.shutdown
88
101
  async def cleanup():
89
- global ctx
90
- ctx = None
102
+ global env
103
+ env = None
91
104
 
92
105
  @mcp.tool()
93
106
  async def act() -> str:
94
- """Perform an action."""
95
- return f"Action #{{ctx.act()}}"
107
+ """Perform an action that changes the environment state."""
108
+ if env is None:
109
+ raise RuntimeError("Context not initialized")
110
+ count = env.act()
111
+ return f"Action #{{count}} performed. Current count: {{count}}"
96
112
 
97
113
  @mcp.tool()
98
114
  async def setup() -> str:
99
- """Required for HUD environments."""
100
- return "Ready"
115
+ """Reset the environment to initial state."""
116
+ if env is None:
117
+ raise RuntimeError("Context not initialized")
118
+ env.reset()
119
+ return "Counter reset to 0"
101
120
 
102
121
  @mcp.tool()
103
- async def evaluate() -> dict:
104
- """Required for HUD environments."""
105
- return {{"count": ctx.get_count()}}
122
+ async def evaluate(target: int = 10) -> EvaluationResult:
123
+ """Check if the counter reached the target value."""
124
+ if env is None:
125
+ raise RuntimeError("Context not initialized")
126
+ current_count = env.get_count()
127
+
128
+ # Calculate reward as progress towards target
129
+ reward = min(current_count / target, 1.0) if target > 0 else 0.0
130
+ done = current_count >= target
131
+
132
+ return EvaluationResult(
133
+ reward=reward,
134
+ done=done,
135
+ content=f"Counter at {{current_count}}/{{target}}"
136
+ )
106
137
 
107
138
  if __name__ == "__main__":
108
139
  mcp.run()
109
140
  '''
110
141
 
142
+ TASKS_JSON_TEMPLATE = '''[
143
+ {{
144
+ "prompt": "Increment the counter to reach 10",
145
+ "mcp_config": {{
146
+ "{name}": {{
147
+ "url": "http://localhost:8765/mcp"
148
+ }}
149
+ }},
150
+ "setup_tool": {{
151
+ "name": "setup",
152
+ "arguments": {{}}
153
+ }},
154
+ "evaluate_tool": {{
155
+ "name": "evaluate",
156
+ "arguments": {{
157
+ "target": 10
158
+ }}
159
+ }}
160
+ }}
161
+ ]
162
+ '''
163
+
164
+ TEST_TASK_TEMPLATE = '''#!/usr/bin/env python
165
+ """Simple example of running tasks from tasks.json.
166
+
167
+ Make sure to run 'hud dev --build' in another terminal first!
168
+ """
169
+
170
+ import asyncio
171
+ import json
172
+ from hud.datasets import Task
173
+ from hud.clients import MCPClient
174
+
175
+
176
+ async def run_task(task_data: dict):
177
+ task = Task(**task_data)
178
+ client = MCPClient(mcp_config=task.mcp_config)
179
+
180
+ try:
181
+ print("Initializing client...")
182
+ await client.initialize()
183
+
184
+ result = await client.call_tool(task.setup_tool) # type: ignore
185
+ print(f"✅ Setup: {{result.content}}")
186
+
187
+ print("\\n🔄 Performing actions:")
188
+ for _ in range(10):
189
+ result = await client.call_tool(name="act", arguments={{}})
190
+ print(f" {{result.content}}")
191
+
192
+ result = await client.call_tool(task.evaluate_tool) # type: ignore
193
+ print(f"\\n📊 Evaluation: {{result.content}}")
194
+
195
+ return result.content
196
+ except Exception as e:
197
+ if "connection" in str(e).lower():
198
+ print("❌ Could not connect. Make sure 'hud dev --build' is running in another terminal.")
199
+ else:
200
+ raise e
201
+ finally:
202
+ await client.shutdown()
203
+
204
+
205
+ async def main():
206
+ for task_data in json.load(open("tasks.json")):
207
+ await run_task(task_data)
208
+
209
+ if __name__ == "__main__":
210
+ asyncio.run(main())
211
+ '''
212
+
213
+ NOTEBOOK_TEMPLATE = '''{{
214
+ "cells": [
215
+ {{
216
+ "cell_type": "markdown",
217
+ "metadata": {{}},
218
+ "source": [
219
+ "### Step 1: Create a Task\\n",
220
+ "\\n",
221
+ "A Task combines:\\n",
222
+ "- **Prompt**: What we want an agent to accomplish\\n",
223
+ "- **MCP Config**: How to spawn the environment\\n",
224
+ "- **Setup Tool**: How to prepare the environment\\n",
225
+ "- **Evaluate Tool**: How to check if the task succeeded"
226
+ ]
227
+ }},
228
+ {{
229
+ "cell_type": "code",
230
+ "execution_count": null,
231
+ "metadata": {{}},
232
+ "outputs": [],
233
+ "source": [
234
+ "from hud.datasets import Task\\n",
235
+ "from hud.types import MCPToolCall\\n",
236
+ "\\n",
237
+ "# Create a task that uses our {name} environment\\n",
238
+ "# See tasks.json for how to build a loadable task dataset\\n",
239
+ "task = Task(\\n",
240
+ " prompt=\\"Increment the counter to reach 10\\",\\n",
241
+ " mcp_config={{\\n",
242
+ " \\"{name}\\": {{\\n",
243
+ " \\"url\\": \\"http://localhost:8765/mcp\\"\\n",
244
+ " }},\\n",
245
+ " }},\\n",
246
+ " setup_tool=MCPToolCall(name=\\"setup\\", arguments={{}}),\\n",
247
+ " evaluate_tool=MCPToolCall(name=\\"evaluate\\", arguments={{\\"target\\": 10}}),\\n",
248
+ ")"
249
+ ]
250
+ }},
251
+ {{
252
+ "cell_type": "markdown",
253
+ "metadata": {{}},
254
+ "source": [
255
+ "### Step 2: Initialize MCP Client\\n",
256
+ "\\n",
257
+ "Run `hud dev --build` before this cell to intialize the server at `http://localhost:8765/mcp`"
258
+ ]
259
+ }},
260
+ {{
261
+ "cell_type": "code",
262
+ "execution_count": null,
263
+ "metadata": {{}},
264
+ "outputs": [],
265
+ "source": [
266
+ "from hud.clients import MCPClient\\n",
267
+ "\\n",
268
+ "# Create the client\\n",
269
+ "client = MCPClient(mcp_config=task.mcp_config, auto_trace=False)\\n",
270
+ "\\n",
271
+ "# Initialize it (this connects to our dev server)\\n",
272
+ "await client.initialize()"
273
+ ]
274
+ }},
275
+ {{
276
+ "cell_type": "markdown",
277
+ "metadata": {{}},
278
+ "source": [
279
+ "### Step 3: Run Setup\\n",
280
+ "\\n",
281
+ "Call the setup tool to prepare the environment according to the task."
282
+ ]
283
+ }},
284
+ {{
285
+ "cell_type": "code",
286
+ "execution_count": null,
287
+ "metadata": {{}},
288
+ "outputs": [],
289
+ "source": [
290
+ "# Run the setup from our task\\n",
291
+ "setup_result = await client.call_tool(task.setup_tool) # type: ignore\\n",
292
+ "print(f\\"Setup result: {{setup_result}}\\")"
293
+ ]
294
+ }},
295
+ {{
296
+ "cell_type": "markdown",
297
+ "metadata": {{}},
298
+ "source": [
299
+ "### Step 4: Perform Actions\\n",
300
+ "\\n",
301
+ "Now we'll manually perform actions to complete the task. In a real scenario, an AI agent would figure out what actions to take."
302
+ ]
303
+ }},
304
+ {{
305
+ "cell_type": "code",
306
+ "execution_count": null,
307
+ "metadata": {{}},
308
+ "outputs": [],
309
+ "source": [
310
+ "# Increment the counter 10 times\\n",
311
+ "for i in range(10):\\n",
312
+ " result = await client.call_tool(name=\\"act\\", arguments={{}})\\n",
313
+ " print(f\\"Step {{i+1}}: {{result.content}}\\")"
314
+ ]
315
+ }},
316
+ {{
317
+ "cell_type": "markdown",
318
+ "metadata": {{}},
319
+ "source": [
320
+ "## Step 5: Evaluate Success\\n",
321
+ "\\n",
322
+ "Check if we completed the task according to the evaluation criteria."
323
+ ]
324
+ }},
325
+ {{
326
+ "cell_type": "code",
327
+ "execution_count": null,
328
+ "metadata": {{}},
329
+ "outputs": [],
330
+ "source": [
331
+ "# Run the evaluation from our task\\n",
332
+ "eval_result = await client.call_tool(task.evaluate_tool) # type: ignore\\n",
333
+ "\\n",
334
+ "# The result is a list with one TextContent item containing JSON\\n",
335
+ "print(eval_result)"
336
+ ]
337
+ }},
338
+ {{
339
+ "cell_type": "markdown",
340
+ "metadata": {{}},
341
+ "source": [
342
+ "### Step 6: Cleanup\\n",
343
+ "\\n",
344
+ "Always shut down the client when done to stop the Docker container. Either stop hud dev in the terminal, or run this command:"
345
+ ]
346
+ }},
347
+ {{
348
+ "cell_type": "code",
349
+ "execution_count": null,
350
+ "metadata": {{}},
351
+ "outputs": [],
352
+ "source": [
353
+ "await client.shutdown()"
354
+ ]
355
+ }},
356
+ {{
357
+ "cell_type": "markdown",
358
+ "metadata": {{}},
359
+ "source": [
360
+ "### Bonus: Running with an AI Agent\\n",
361
+ "\\n",
362
+ "Instead of manually calling tools, you can have an AI agent solve the task automatically."
363
+ ]
364
+ }},
365
+ {{
366
+ "cell_type": "code",
367
+ "execution_count": null,
368
+ "metadata": {{}},
369
+ "outputs": [],
370
+ "source": [
371
+ "# Uncomment to run with Claude (requires ANTHROPIC_API_KEY)\\n",
372
+ "from hud.agents import ClaudeAgent\\n",
373
+ "\\n",
374
+ "# Create an agent\\n",
375
+ "agent = ClaudeAgent(\\n",
376
+ " model=\\"claude-sonnet-4-20250514\\",\\n",
377
+ " allowed_tools=[\\"act\\"] # Only allow the act tool\\n",
378
+ ")\\n",
379
+ "\\n",
380
+ "# Run the task\\n",
381
+ "result = await agent.run(task)\\n",
382
+ "print(f\\"Final reward: {{result.reward}}\\")"
383
+ ]
384
+ }},
385
+ {{
386
+ "cell_type": "markdown",
387
+ "metadata": {{}},
388
+ "source": [
389
+ "### Next Steps\\n",
390
+ "\\n",
391
+ "1. **Create your own evaluators**: Add new evaluation functions to `server.py`\\n",
392
+ "2. **Build complex environments**: Replace the simple counter with your actual application\\n",
393
+ "3. **Test with agents**: Use different AI models to solve your tasks\\n",
394
+ "\\n",
395
+ "For more examples, check out:\\n",
396
+ "- `environments/text_2048/` - A complete 2048 game environment\\n",
397
+ "- `environments/browser/` - A full browser automation environment with GUI"
398
+ ]
399
+ }},
400
+ {{
401
+ "cell_type": "code",
402
+ "execution_count": null,
403
+ "metadata": {{}},
404
+ "outputs": [],
405
+ "source": []
406
+ }}
407
+ ],
408
+ "metadata": {{
409
+ "kernelspec": {{
410
+ "display_name": "Python 3",
411
+ "language": "python",
412
+ "name": "python3"
413
+ }},
414
+ "language_info": {{
415
+ "codemirror_mode": {{
416
+ "name": "ipython",
417
+ "version": 3
418
+ }},
419
+ "file_extension": ".py",
420
+ "mimetype": "text/x-python",
421
+ "name": "python",
422
+ "nbconvert_exporter": "python",
423
+ "pygments_lexer": "ipython3",
424
+ "version": "3.11.0"
425
+ }}
426
+ }},
427
+ "nbformat": 4,
428
+ "nbformat_minor": 4
429
+ }}
430
+ '''
431
+
111
432
  README_TEMPLATE = '''# {title}
112
433
 
113
- A minimal HUD environment created with `hud init`.
434
+ A minimal HUD environment demonstrating the Task pattern with a simple counter.
114
435
 
115
436
  ## Quick Start
116
437
 
438
+ ### Interactive Development
117
439
  ```bash
118
- # Build and run locally
119
- hud dev
440
+ # 1. Start the environment (optional: with inspector)
441
+ hud dev --build --inspector
442
+
443
+ # 2. Choose your preferred way to test:
444
+
445
+ # Option A: Interactive notebook test_env.ipynb (great for learning!)
120
446
 
121
- # Or build first
122
- docker build -t {name}:dev .
123
- hud dev --image {name}:dev
447
+ # Option B: Simple Python script (runs all tasks from tasks.json)
448
+ python test_task.py
124
449
  ```
125
450
 
126
- ## Structure
451
+ ### Run with an Agent
452
+ ```bash
453
+ # Run the task with Claude
454
+ hud eval tasks.json --agent claude
455
+ ```
127
456
 
128
- - `src/hud_controller/server.py` - MCP server with tools
129
- - `src/hud_controller/context.py` - Persistent state across hot-reloads
130
- - `Dockerfile` - Container configuration
131
- - `pyproject.toml` - Python dependencies
457
+ ## How HUD Environments Work
132
458
 
133
- ## Adding Tools
459
+ The environment is split into two components:
134
460
 
135
- Add new tools to `server.py`:
461
+ - **`env.py`** - Stateful logic that persists across reloads
462
+ - **`server.py`** - MCP server with tools (reloads on file changes)
136
463
 
137
- ```python
138
- @mcp.tool()
139
- async def my_tool(param: str) -> str:
140
- """Tool description."""
141
- return f"Result: {{param}}"
464
+ This separation is crucial for `hud dev` - it allows you to modify the MCP tools and see changes immediately without losing the environment state. The environment runs as a separate process and communicates via socket, while the server can be restarted freely.
465
+
466
+ If you are ever seeing issues with the environment itself, running `hud dev --full-reload` will reload both the environment and the server.
467
+
468
+ ## Publishing Your Environment
469
+
470
+ Once your environment is ready, you can share it with the community:
471
+
472
+ ### 1. Push to Registry
473
+ ```bash
474
+ # Build and push your environment (this requires docker hub login and hud api key)
475
+ hud build
476
+ hud push
142
477
  ```
143
478
 
144
- ## Adding State
479
+ ### 2. Create a Dataset
145
480
 
146
- Extend the `Context` class in `context.py`:
481
+ Create a dataset on HuggingFace with your tasks:
147
482
 
483
+ **Option A: Upload manually**
484
+ 1. Upload your `tasks.json` to HuggingFace
485
+ 2. Make sure it's **public** to appear on leaderboards
486
+
487
+ **Option B: Use the SDK**
148
488
  ```python
149
- class Context:
150
- def __init__(self):
151
- self.count = 0
152
- self.data = {{}} # Add your state
489
+ from hud.datasets import save_tasks
490
+ import json
491
+
492
+ # Load your tasks
493
+ with open("tasks.json") as f:
494
+ tasks = json.load(f)
495
+
496
+ # Push to HuggingFace
497
+ save_tasks(tasks, repo_id="your-org/your-dataset")
153
498
  ```
154
499
 
155
- ## Learn More
500
+ ### 3. Run and Track Performance
156
501
 
157
- - [HUD Documentation](https://docs.hud.so)
158
- - [MCP Specification](https://modelcontextprotocol.io)
502
+ ```bash
503
+ # Run Claude on your benchmark
504
+ hud eval "your-org/your-dataset" --agent claude
505
+
506
+ # View results at:
507
+ # app.hud.so/leaderboards/your-org/your-dataset
508
+ ```
509
+
510
+ **Note**: Only public HuggingFace datasets appear as leaderboards!
511
+
512
+ 📚 Learn more: [Creating Benchmarks](https://docs.hud.so/evaluate-agents/create-benchmarks) | [Leaderboards](https://docs.hud.so/evaluate-agents/leaderboards)
159
513
  '''
160
514
 
161
515
 
@@ -202,7 +556,7 @@ def create_environment(name: str | None, directory: str, force: bool) -> None:
202
556
  design.warning(f"Overwriting existing files in {target_dir}")
203
557
 
204
558
  # Create directory structure
205
- src_dir = target_dir / "src" / "hud_controller"
559
+ src_dir = target_dir / "src" / "controller"
206
560
  src_dir.mkdir(parents=True, exist_ok=True)
207
561
 
208
562
  # Write files with proper formatting
@@ -210,37 +564,54 @@ def create_environment(name: str | None, directory: str, force: bool) -> None:
210
564
 
211
565
  # Dockerfile
212
566
  dockerfile_path = target_dir / "Dockerfile"
213
- dockerfile_path.write_text(DOCKERFILE_TEMPLATE.strip() + "\n")
567
+ dockerfile_path.write_text(DOCKERFILE_TEMPLATE.strip() + "\n", encoding="utf-8")
214
568
  files_created.append("Dockerfile")
215
569
 
216
570
  # pyproject.toml
217
571
  pyproject_path = target_dir / "pyproject.toml"
218
572
  pyproject_content = PYPROJECT_TEMPLATE.format(name=package_name).strip() + "\n"
219
- pyproject_path.write_text(pyproject_content)
573
+ pyproject_path.write_text(pyproject_content, encoding="utf-8")
220
574
  files_created.append("pyproject.toml")
221
575
 
222
576
  # README.md
223
577
  readme_path = target_dir / "README.md"
224
578
  readme_content = README_TEMPLATE.format(name=package_name, title=name).strip() + "\n"
225
- readme_path.write_text(readme_content)
579
+ readme_path.write_text(readme_content, encoding="utf-8")
226
580
  files_created.append("README.md")
227
581
 
228
582
  # Python files
229
583
  # __init__.py
230
584
  init_path = src_dir / "__init__.py"
231
- init_path.write_text('"""HUD Controller Package"""\n')
232
- files_created.append("src/hud_controller/__init__.py")
585
+ init_path.write_text('"""Controller Package"""\n', encoding="utf-8")
586
+ files_created.append("src/controller/__init__.py")
233
587
 
234
- # context.py
235
- context_path = src_dir / "context.py"
236
- context_path.write_text(CONTEXT_TEMPLATE.strip() + "\n")
237
- files_created.append("src/hud_controller/context.py")
588
+ # env.py
589
+ env_path = src_dir / "env.py"
590
+ env_path.write_text(ENV_TEMPLATE.strip() + "\n", encoding="utf-8")
591
+ files_created.append("src/controller/env.py")
238
592
 
239
593
  # server.py (need to escape the double braces for .format())
240
594
  server_path = src_dir / "server.py"
241
595
  server_content = SERVER_TEMPLATE.format(name=package_name).strip() + "\n"
242
- server_path.write_text(server_content)
243
- files_created.append("src/hud_controller/server.py")
596
+ server_path.write_text(server_content, encoding="utf-8")
597
+ files_created.append("src/controller/server.py")
598
+
599
+ # tasks.json
600
+ tasks_path = target_dir / "tasks.json"
601
+ tasks_content = TASKS_JSON_TEMPLATE.format(name=package_name).strip() + "\n"
602
+ tasks_path.write_text(tasks_content, encoding="utf-8")
603
+ files_created.append("tasks.json")
604
+
605
+ # test_task.py
606
+ test_task_path = target_dir / "test_task.py"
607
+ test_task_path.write_text(TEST_TASK_TEMPLATE.strip() + "\n", encoding="utf-8")
608
+ files_created.append("test_task.py")
609
+
610
+ # notebook.ipynb
611
+ notebook_path = target_dir / "test_env.ipynb"
612
+ notebook_content = NOTEBOOK_TEMPLATE.format(name=package_name).strip() + "\n"
613
+ notebook_path.write_text(notebook_content, encoding="utf-8")
614
+ files_created.append("test_env.ipynb")
244
615
 
245
616
  # Success message
246
617
  design.header(f"Created HUD Environment: {name}")
@@ -264,16 +635,24 @@ def create_environment(name: str | None, directory: str, force: bool) -> None:
264
635
  design.info("\n3. Connect from Cursor or test via the MCP inspector:")
265
636
  design.info(" Follow the instructions shown by hud dev --inspector")
266
637
 
267
- design.info("\n4. Customize your environment:")
268
- design.info(" - Add tools to src/hud_controller/server.py")
269
- design.info(" - Add state to src/hud_controller/context.py")
638
+ design.info("\n4. Test your environment:")
639
+ design.command_example("python test_task.py")
640
+
641
+ design.info("\n5. Customize your environment:")
642
+ design.info(" - Add tools to src/controller/server.py")
643
+ design.info(" - Add state to src/controller/env.py")
644
+ design.info(" - Modify tasks in tasks.json")
645
+ design.info(" - Experiment in test_env.ipynb")
270
646
 
271
647
  # Show a sample of the server code
272
648
  design.section_title("Your MCP server")
273
649
  sample_code = '''@mcp.tool()
274
650
  async def act() -> str:
275
- """Perform an action."""
276
- return f"Action #{ctx.act()}"'''
651
+ """Perform an action that changes the environment state."""
652
+ if env is None:
653
+ raise RuntimeError("Context not initialized")
654
+ count = env.act()
655
+ return f"Action #{count} performed. Current count: {count}"'''
277
656
 
278
657
  syntax = Syntax(sample_code, "python", theme="monokai", line_numbers=False)
279
658
  design.console.print(Panel(syntax, border_style="dim"))
hud/cli/list_func.py CHANGED
@@ -169,7 +169,7 @@ def list_environments(
169
169
 
170
170
  table.add_row(*row)
171
171
 
172
- design.print(str(table))
172
+ design.print(table) # type: ignore
173
173
  design.info("")
174
174
 
175
175
  # Show usage hints
hud/clients/fastmcp.py CHANGED
@@ -106,19 +106,9 @@ class FastMCPHUDClient(BaseHUDClient):
106
106
 
107
107
  # Configure validation for output schemas based on client setting
108
108
  try:
109
- from hud_mcp.client.session import ( # type: ignore[import-not-found]
110
- ValidationOptions, # type: ignore[import-not-found]
111
- )
112
-
113
- if (
114
- hasattr(self._client, "_session_state")
115
- and self._client._session_state.session is not None
116
- ):
117
- self._client._session_state.session._validation_options = ValidationOptions( # type: ignore[attr-defined]
118
- strict_output_validation=self._strict_validation
119
- )
109
+ if hasattr(self._client, "_session_state") and self._client._session_state.session is not None: # noqa: E501
110
+ self._client._session_state.session._validate_structured_outputs = self._strict_validation # noqa: E501
120
111
  except ImportError:
121
- # ValidationOptions may not be available in some mcp versions
122
112
  pass
123
113
 
124
114
  logger.info("FastMCP client connected")
hud/clients/mcp_use.py CHANGED
@@ -73,19 +73,13 @@ class MCPUseHUDClient(BaseHUDClient):
73
73
 
74
74
  # Configure validation for all sessions based on client setting
75
75
  try:
76
- from hud_mcp.client.session import ( # type: ignore[import-not-found]
77
- ValidationOptions, # type: ignore[import-not-found]
78
- )
79
-
80
76
  for session in self._sessions.values():
81
77
  if (
82
78
  hasattr(session, "connector")
83
79
  and hasattr(session.connector, "client_session")
84
80
  and session.connector.client_session is not None
85
81
  ):
86
- session.connector.client_session._validation_options = ValidationOptions(
87
- strict_output_validation=self._strict_validation
88
- )
82
+ session.connector.client_session._validate_structured_outputs = self._strict_validation # noqa: E501
89
83
  except ImportError:
90
84
  # ValidationOptions may not be available in some mcp versions
91
85
  pass
@@ -30,11 +30,15 @@ def install_mcp_instrumentation(provider: TracerProvider) -> None:
30
30
  logger = logging.getLogger(__name__)
31
31
 
32
32
  try:
33
+ # First, patch the _instruments to use our fork
34
+ import opentelemetry.instrumentation.mcp.instrumentation as mcp_inst
35
+ mcp_inst._instruments = ("hud-mcp-python-sdk >= 3.13.1",)
36
+
33
37
  from opentelemetry.instrumentation.mcp.instrumentation import (
34
38
  McpInstrumentor,
35
39
  )
36
40
 
37
- # First, patch the instrumentation to handle 3-value transports correctly
41
+ # Then, patch the instrumentation to handle 3-value transports correctly
38
42
  _patch_mcp_instrumentation()
39
43
 
40
44
  McpInstrumentor().instrument(tracer_provider=provider)
hud/server/server.py CHANGED
@@ -116,7 +116,7 @@ class MCPServer(FastMCP):
116
116
 
117
117
  # Replace FastMCP's low-level server with our version that supports
118
118
  # per-server initialization hooks
119
- def _run_init(ctx: RequestContext) -> Any:
119
+ def _run_init(ctx: RequestContext | None = None) -> Any:
120
120
  if self._initializer_fn is not None and not self._did_init:
121
121
  self._did_init = True
122
122
  # Redirect stdout to stderr during initialization to prevent
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.4.12"
8
+ assert hud.__version__ == "0.4.14"
hud/version.py CHANGED
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.4.12"
7
+ __version__ = "0.4.14"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.12
3
+ Version: 0.4.14
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -35,10 +35,9 @@ Classifier: Programming Language :: Python :: 3.11
35
35
  Classifier: Programming Language :: Python :: 3.12
36
36
  Classifier: Programming Language :: Python :: 3.13
37
37
  Requires-Python: <3.14,>=3.11
38
- Requires-Dist: fastmcp>=2.11.2
39
38
  Requires-Dist: httpx<1,>=0.23.0
40
- Requires-Dist: hud-mcp-python-sdk>=0.1.0
41
- Requires-Dist: mcp>=1.13.1
39
+ Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
40
+ Requires-Dist: hud-mcp-python-sdk>=3.13.2
42
41
  Requires-Dist: opentelemetry-api>=1.34.1
43
42
  Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
44
43
  Requires-Dist: opentelemetry-instrumentation-mcp>=0.44.1
@@ -56,6 +55,7 @@ Provides-Extra: agent
56
55
  Requires-Dist: anthropic; extra == 'agent'
57
56
  Requires-Dist: datasets>=2.14.0; extra == 'agent'
58
57
  Requires-Dist: dotenv>=0.9.9; extra == 'agent'
58
+ Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agent'
59
59
  Requires-Dist: ipykernel; extra == 'agent'
60
60
  Requires-Dist: ipython<9; extra == 'agent'
61
61
  Requires-Dist: jupyter-client; extra == 'agent'
@@ -63,13 +63,13 @@ Requires-Dist: jupyter-core; extra == 'agent'
63
63
  Requires-Dist: langchain; extra == 'agent'
64
64
  Requires-Dist: langchain-anthropic; extra == 'agent'
65
65
  Requires-Dist: langchain-openai; extra == 'agent'
66
- Requires-Dist: mcp-use; extra == 'agent'
67
66
  Requires-Dist: numpy>=1.24.0; extra == 'agent'
68
67
  Requires-Dist: openai; extra == 'agent'
69
68
  Provides-Extra: agents
70
69
  Requires-Dist: anthropic; extra == 'agents'
71
70
  Requires-Dist: datasets>=2.14.0; extra == 'agents'
72
71
  Requires-Dist: dotenv>=0.9.9; extra == 'agents'
72
+ Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agents'
73
73
  Requires-Dist: ipykernel; extra == 'agents'
74
74
  Requires-Dist: ipython<9; extra == 'agents'
75
75
  Requires-Dist: jupyter-client; extra == 'agents'
@@ -77,7 +77,6 @@ Requires-Dist: jupyter-core; extra == 'agents'
77
77
  Requires-Dist: langchain; extra == 'agents'
78
78
  Requires-Dist: langchain-anthropic; extra == 'agents'
79
79
  Requires-Dist: langchain-openai; extra == 'agents'
80
- Requires-Dist: mcp-use; extra == 'agents'
81
80
  Requires-Dist: numpy>=1.24.0; extra == 'agents'
82
81
  Requires-Dist: openai; extra == 'agents'
83
82
  Provides-Extra: dev
@@ -85,6 +84,7 @@ Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
85
84
  Requires-Dist: anthropic; extra == 'dev'
86
85
  Requires-Dist: datasets>=2.14.0; extra == 'dev'
87
86
  Requires-Dist: dotenv>=0.9.9; extra == 'dev'
87
+ Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'dev'
88
88
  Requires-Dist: inspect-ai>=0.3.80; extra == 'dev'
89
89
  Requires-Dist: ipykernel; extra == 'dev'
90
90
  Requires-Dist: ipython<9; extra == 'dev'
@@ -93,7 +93,6 @@ Requires-Dist: jupyter-core; extra == 'dev'
93
93
  Requires-Dist: langchain; extra == 'dev'
94
94
  Requires-Dist: langchain-anthropic; extra == 'dev'
95
95
  Requires-Dist: langchain-openai; extra == 'dev'
96
- Requires-Dist: mcp-use; extra == 'dev'
97
96
  Requires-Dist: numpy>=1.24.0; extra == 'dev'
98
97
  Requires-Dist: openai; extra == 'dev'
99
98
  Requires-Dist: pillow>=11.1.0; extra == 'dev'
@@ -3,7 +3,7 @@ hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
3
3
  hud/datasets.py,sha256=8lqC840kcNx01D2CcWZCd1j0eZTpepILmQrvohZIZYU,12056
4
4
  hud/settings.py,sha256=WIJDsyrfwBZGcaGT46YUOpW8xjBZl3siXXprd92ASAg,2039
5
5
  hud/types.py,sha256=pQWOPYXUZ2hhK0h-AHBc3DCj5tkbRXHqKZnsQQIcSFA,4237
6
- hud/version.py,sha256=9ZxDFavFCdgSfY_Jd59VomoRB2HCKh4pQnAZlzaXWJ4,105
6
+ hud/version.py,sha256=EujFSzlsB3e5WmhxNLuJ-8DYtTfWdY6iOL9lPpx0r5U,105
7
7
  hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
8
8
  hud/agents/base.py,sha256=M2g7Cj5InE4EsXpmxqURprC3IHNGvNZFBZ8HPIQxz-A,24574
9
9
  hud/agents/claude.py,sha256=snbYFPW-KAkw4n9Rdz7dC2f46RuSHJKC53HPm8SucFM,14273
@@ -17,16 +17,16 @@ hud/agents/tests/test_base.py,sha256=F39ajSqASGUbPyPoWSY9KARFav62qNTK74W11Tr1Tg4
17
17
  hud/agents/tests/test_claude.py,sha256=wqEKlzEvx8obz1sSm4NY0j-Zyt1qWNfDOmRqYIuAEd0,13069
18
18
  hud/agents/tests/test_client.py,sha256=Sk5bGZw2hL5GsVi2LMp9tsLngl5ZQ18pkpeeQmts0ao,13908
19
19
  hud/agents/tests/test_openai.py,sha256=ZJqctxCbJtKw6TkJCP4D2xAcG8CkxzDXO7dh5IIWN_M,9175
20
- hud/cli/__init__.py,sha256=SwRcyG1UtyXqdai-kWduvyJvpo2WJmI9AffzmqUprzs,29987
20
+ hud/cli/__init__.py,sha256=ecjrYlswQB9JJsbxQFcKZVD0fn0ZWneKSLm_kBYWpQ0,30302
21
21
  hud/cli/__main__.py,sha256=fDH7XITyuDITwSDIVwRso06aouADO0CzTHKqp5TOwJE,143
22
22
  hud/cli/analyze.py,sha256=G-tjT1xLPLcYhDhZEaI7TAIS0z0OACUksnGFoAWd2ag,14416
23
23
  hud/cli/build.py,sha256=c8pg8iUlCT1-E4koEKFX1Nx8oGaB2ln57pHdOCCDAvs,19126
24
24
  hud/cli/clone.py,sha256=AwVDIuhr8mHb1oT2Af2HrD25SiTdwATpE6zd93vzLgA,6099
25
25
  hud/cli/debug.py,sha256=FNzg9-_ZzUJA1nJfubmop7_2OT5mqnWsdpZyi4AVSXA,14163
26
- hud/cli/dev.py,sha256=Tx0Pf54oeHJQytVRx4up3mYE9m_Gxo3esSkhTRTCVX8,27074
27
- hud/cli/eval.py,sha256=yBHwekweC2orpWRVpPBAFSgWtpKpz8Dsa5drla-iTpI,12425
28
- hud/cli/init.py,sha256=GH6ls9JZ_pXaiVmsLwfv45rdRXa1tAfAMybvQX9Ooqg,7659
29
- hud/cli/list_func.py,sha256=0pEC4XD1ReUza0EFfK4dFXAbUAwyVavm6zI9Z3bpPAw,7054
26
+ hud/cli/dev.py,sha256=ANsd34gHX08eQxeXz6atIuDyi7Tw8qngqvmDPAx-PI0,28640
27
+ hud/cli/eval.py,sha256=zrUoXYdSe5cVbWa5fc9-tNK9syBCtKOpKDvc0ApeYQU,12604
28
+ hud/cli/init.py,sha256=guJbNkVuFhc-c2jTEx_jZxzzPkJRtGTJapWk5hyuyd8,18710
29
+ hud/cli/list_func.py,sha256=ENxLL4X5uuqAASWZdQuI0k-tEzmlhUn5LATgz3QPQqQ,7065
30
30
  hud/cli/pull.py,sha256=JHwCwUwRO0Nzbgm9mkjsz6EpxbxgwQVhgNSY64nNZ-s,11969
31
31
  hud/cli/push.py,sha256=4KrEHj0_i3xJNCB3eRjANmHFhSW4MFfpnld3nfVYENs,17904
32
32
  hud/cli/remove.py,sha256=USAvB6pbMA3jd19xUtLEBiMsklVTEfE2Maw9nYcpSAE,6619
@@ -60,8 +60,8 @@ hud/cli/utils/server.py,sha256=uSx2DjG5vX-PFoD8zNH-gBHbkTNSHveFSVdAfmp09Tc,7341
60
60
  hud/clients/README.md,sha256=XNE3mch95ozDgVqfwCGcrhlHY9CwT1GKfNANNboowto,3826
61
61
  hud/clients/__init__.py,sha256=bcPIa7dwH5ENsjh7CzjsJ84fm7Ma93NBc2lGfSjGAKM,328
62
62
  hud/clients/base.py,sha256=ob8G7_Gi-aENnc0yxHpZmzuqBD-swn_jVWkY2Iw7F4k,13995
63
- hud/clients/fastmcp.py,sha256=w7psl3JRIGxyqznxHYyhsptY6d1obPfz6y7jpJimC7E,9502
64
- hud/clients/mcp_use.py,sha256=7yDzv_w0mmDbOWMIN0_AGuzeDiBWwq27tcoVkZJXhk4,12174
63
+ hud/clients/fastmcp.py,sha256=b1Q5HltWWmnAhj-Nv6T4T5gitDn5bEfqiLy5PU5yD9g,9102
64
+ hud/clients/mcp_use.py,sha256=qRHDJ6ELRISD4V9NVPAX5SNE3NZqyunPAqDdpBtaslg,11920
65
65
  hud/clients/tests/__init__.py,sha256=sKOtJFFa4mDIXh1U6O8ZUHjigE8CiRMQ2PzJTIBZuVE,33
66
66
  hud/clients/tests/test_client_integration.py,sha256=kohU6jfCNfwSnAushHeB1_CmDlRfQc7VBL0GEdJYSeI,4198
67
67
  hud/clients/tests/test_fastmcp.py,sha256=4q3TzDjuieTZa89taiNJIrzbUncNkYOG4MaubypA21k,13030
@@ -75,14 +75,14 @@ hud/otel/collector.py,sha256=jLZymZ8r7xt2VDuWexfbnT7PY1-0aiyLMgjBy8KDY1M,4497
75
75
  hud/otel/config.py,sha256=6np_C2UXhtKHHjY41HQxZElua2Eh_EUCBiRB_YuiSuc,6249
76
76
  hud/otel/context.py,sha256=C9MvO99cRSNNDEDC7ehO3eoTPnb6J7AemUYvEp57yEU,17774
77
77
  hud/otel/exporters.py,sha256=TP7SF6ySCP-gFV1i-u5-HbpYsK3n9GP3OjW_ZBfsj-w,14246
78
- hud/otel/instrumentation.py,sha256=ieKHEnMMSMZxPYvZokYJG-UQYXTNyEnaYG1lGY4zIHA,3575
78
+ hud/otel/instrumentation.py,sha256=xbRRmTDwDyCvJVm4iWmB65kXOhotTnv9GjwkufARBuk,3782
79
79
  hud/otel/processors.py,sha256=yI5BWsDBMEPfwMzD-iWbJd4KWH3qUDSe-5-C1yT6fjU,4615
80
80
  hud/otel/tests/__init__.py,sha256=VNJKBMaxTtbn7trW-1Ph50zCvCok_wTSGcI1HD6GOLA,43
81
81
  hud/otel/tests/test_processors.py,sha256=np0R4ssd9j6LJSJykJ5bNjl0POwNYNhgb7BqOZHwcMY,6778
82
82
  hud/server/__init__.py,sha256=8LUwgsXO8xiViWP7uImDwcOsWLu01r5F4r8U8qH3rSY,91
83
83
  hud/server/context.py,sha256=6bCdSzv1FGyItu9472HbbYef279H7QuMGJDR8EtYg5Y,3210
84
84
  hud/server/low_level.py,sha256=XYs2pOJ9kN4OcJ6ahDmXM5mWkzq5wJLpKFInUYrWEok,4701
85
- hud/server/server.py,sha256=q6JdnsnzX4WwBLTQCCuH0pjbYaRtbuQ9xtYuHiHpa6U,7999
85
+ hud/server/server.py,sha256=jx2JEGeVkV5wDVKM7Sb474uY4fd-c6azo7HS_SFYDxo,8013
86
86
  hud/server/helper/__init__.py,sha256=ZxO8VP3RZEBBp-q65VixuhzQgqEPSVzW0hEY9J9QqDA,116
87
87
  hud/server/tests/__init__.py,sha256=eEYYkxX5Hz9woXVOBJ2H2_CQoEih0vH6nRt3sH2Z8v8,49
88
88
  hud/shared/__init__.py,sha256=IPxPCqtPLguryN-nBq78Sakypw2bRiE2iHv3SXG8YRk,139
@@ -144,10 +144,10 @@ hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,
144
144
  hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
145
145
  hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
146
146
  hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
147
- hud/utils/tests/test_version.py,sha256=uZwA5AKQVF3B6Tzw5vdMCGZlCKLkw6ZSQ9nnR6nk8XQ,160
147
+ hud/utils/tests/test_version.py,sha256=JXMZuhuGL6fqB8mARikOgFFMpmq1Y0rG-7kz7V43w5k,160
148
148
  hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
- hud_python-0.4.12.dist-info/METADATA,sha256=SEioaFSGeMPXozmr2ePFpd-daQbRp8rnk-kYiGx1ETs,20176
150
- hud_python-0.4.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
151
- hud_python-0.4.12.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
152
- hud_python-0.4.12.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
153
- hud_python-0.4.12.dist-info/RECORD,,
149
+ hud_python-0.4.14.dist-info/METADATA,sha256=e7OCOwaSi0F_gPdM0CUg2buSpTx0wd9w4uny00NH2xM,20233
150
+ hud_python-0.4.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
151
+ hud_python-0.4.14.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
152
+ hud_python-0.4.14.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
153
+ hud_python-0.4.14.dist-info/RECORD,,