hud-python 0.4.54__py3-none-any.whl → 0.4.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/agents/base.py CHANGED
@@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Literal
11
11
 
12
12
  import mcp.types as types
13
13
 
14
+ from hud.agents.utils import log_agent_metadata_to_status, log_task_config_to_current_trace
14
15
  from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace
15
16
  from hud.utils.hud_console import HUDConsole
16
17
  from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
@@ -62,6 +63,7 @@ class MCPAgent(ABC):
62
63
  initial_screenshot: bool = True,
63
64
  # Misc
64
65
  model_name: str = "mcp-agent",
66
+ checkpoint_name: str | None = None,
65
67
  response_agent: ResponseAgent | None = None,
66
68
  auto_trace: bool = True,
67
69
  verbose: bool = False,
@@ -92,6 +94,7 @@ class MCPAgent(ABC):
92
94
  self._auto_created_client = False # Track if we created the client
93
95
 
94
96
  self.model_name = model_name
97
+ self.checkpoint_name = checkpoint_name
95
98
  self.console = HUDConsole(logger=logger)
96
99
 
97
100
  # Set verbose mode if requested
@@ -198,6 +201,8 @@ class MCPAgent(ABC):
198
201
  f"Agent initialized with {len(self.get_available_tools())} tools: {', '.join([t.name for t in self.get_available_tools()])}" # noqa: E501
199
202
  )
200
203
 
204
+ await log_agent_metadata_to_status(self.model_name, self.checkpoint_name)
205
+
201
206
  async def run(self, prompt_or_task: str | Task | dict[str, Any], max_steps: int = 10) -> Trace:
202
207
  """
203
208
  Run the agent with the given prompt or task.
@@ -223,6 +228,9 @@ class MCPAgent(ABC):
223
228
 
224
229
  # Handle Task objects with full lifecycle
225
230
  if isinstance(prompt_or_task, Task):
231
+ # Log a compact summary of task config to the current trace (async)
232
+ await log_task_config_to_current_trace(prompt_or_task)
233
+
226
234
  return await self.run_task(prompt_or_task, max_steps)
227
235
 
228
236
  # Handle simple string prompts
hud/agents/claude.py CHANGED
@@ -89,7 +89,8 @@ class ClaudeAgent(MCPAgent):
89
89
  self.use_computer_beta = use_computer_beta
90
90
  self.hud_console = HUDConsole(logger=logger)
91
91
 
92
- self.model_name = self.model
92
+ self.model_name = "Claude"
93
+ self.checkpoint_name = self.model
93
94
 
94
95
  # Track mapping from Claude tool names to MCP tool names
95
96
  self._claude_to_mcp_tool_map: dict[str, str] = {}
@@ -98,14 +99,14 @@ class ClaudeAgent(MCPAgent):
98
99
  # Append Claude-specific instructions to the base system prompt
99
100
  claude_instructions = """
100
101
  You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
101
-
102
+
102
103
  When working on tasks:
103
104
  1. Be thorough and systematic in your approach
104
105
  2. Complete tasks autonomously without asking for confirmation
105
106
  3. Use available tools efficiently to accomplish your goals
106
107
  4. Verify your actions and ensure task completion
107
108
  5. Be precise and accurate in all operations
108
-
109
+
109
110
  Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
110
111
  """.strip() # noqa: E501
111
112
 
hud/agents/openai.py CHANGED
@@ -70,6 +70,7 @@ class OperatorAgent(MCPAgent):
70
70
 
71
71
  self.openai_client = model_client
72
72
  self.model = model
73
+ self.checkpoint_name = self.model
73
74
  self.environment = environment
74
75
 
75
76
  # State tracking for OpenAI's stateful API
@@ -84,7 +85,7 @@ class OperatorAgent(MCPAgent):
84
85
  except Exception as e:
85
86
  raise ValueError(f"OpenAI API key is invalid: {e}") from e
86
87
 
87
- self.model_name = "openai-" + self.model
88
+ self.model_name = "Operator"
88
89
 
89
90
  # Append OpenAI-specific instructions to the base system prompt
90
91
  openai_instructions = """
@@ -62,7 +62,8 @@ class GenericOpenAIChatAgent(MCPAgent):
62
62
  else:
63
63
  raise ValueError("Either openai_client or (api_key and base_url) must be provided")
64
64
 
65
- self.model_name = model_name
65
+ self.model_name = "GenericOpenAI"
66
+ self.checkpoint_name = model_name
66
67
  self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
67
68
  self.mcp_schemas = []
68
69
  self.hud_console = HUDConsole(logger=logger)
@@ -194,7 +195,7 @@ class GenericOpenAIChatAgent(MCPAgent):
194
195
  raise ValueError("openai_client is required for GenericOpenAIChatAgent")
195
196
  # default transport = OpenAI SDK
196
197
  return await self.oai.chat.completions.create(
197
- model=self.model_name,
198
+ model=self.checkpoint_name,
198
199
  messages=messages,
199
200
  tools=tools, # type: ignore ready ChatCompletionToolParam-shaped
200
201
  **extra,
@@ -89,7 +89,7 @@ class TestClaudeAgent:
89
89
  validate_api_key=False, # Skip validation in tests
90
90
  )
91
91
 
92
- assert agent.model_name == "claude-3-opus-20240229"
92
+ assert agent.model_name == "Claude"
93
93
  assert agent.max_tokens == 1000
94
94
  assert agent.anthropic_client == mock_model_client
95
95
 
@@ -103,7 +103,7 @@ class TestClaudeAgent:
103
103
  validate_api_key=False, # Skip validation in tests
104
104
  )
105
105
 
106
- assert agent.model_name == "claude-3-opus-20240229"
106
+ assert agent.model_name == "Claude"
107
107
  assert agent.anthropic_client is not None
108
108
 
109
109
  @pytest.mark.asyncio
@@ -50,7 +50,7 @@ class TestOperatorAgent:
50
50
  validate_api_key=False, # Skip validation in tests
51
51
  )
52
52
 
53
- assert agent.model_name == "openai-gpt-4"
53
+ assert agent.model_name == "Operator"
54
54
  assert agent.model == "gpt-4"
55
55
  assert agent.openai_client == mock_model_client
56
56
 
hud/agents/utils.py ADDED
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ from typing import TYPE_CHECKING
5
+
6
+ from hud.otel.context import (
7
+ _update_task_status_async,
8
+ get_current_task_run_id,
9
+ )
10
+
11
+ if TYPE_CHECKING:
12
+ from hud.datasets import Task
13
+
14
+
15
+ async def log_task_config_to_current_trace(task: Task) -> None:
16
+ with contextlib.suppress(Exception):
17
+ task_run_id = get_current_task_run_id()
18
+ if not task_run_id:
19
+ return
20
+
21
+ raw_config = task.model_dump()
22
+
23
+ await _update_task_status_async(
24
+ task_run_id,
25
+ "running",
26
+ task_id=task.id,
27
+ extra_metadata={"task_config": raw_config},
28
+ )
29
+
30
+
31
+ async def log_agent_metadata_to_status(
32
+ model_name: str | None = None, checkpoint_name: str | None = None
33
+ ) -> None:
34
+ """Attach agent metadata (model/checkpoint) to current trace status metadata."""
35
+ with contextlib.suppress(Exception):
36
+ task_run_id = get_current_task_run_id()
37
+ if not task_run_id or (not model_name and not checkpoint_name):
38
+ return
39
+
40
+ agent_meta = {}
41
+ if model_name is not None:
42
+ agent_meta["model_name"] = model_name
43
+ if checkpoint_name is not None:
44
+ agent_meta["checkpoint_name"] = checkpoint_name
45
+
46
+ await _update_task_status_async(
47
+ task_run_id,
48
+ "running",
49
+ extra_metadata={"agent": agent_meta},
50
+ )
hud/cli/__init__.py CHANGED
@@ -382,6 +382,11 @@ def dev(
382
382
  "--watch",
383
383
  help="Additional directories to watch for changes (default: current directory)",
384
384
  ),
385
+ new: bool = typer.Option(
386
+ False,
387
+ "--new",
388
+ help="Show Cursor installation link for new server setup",
389
+ ),
385
390
  ) -> None:
386
391
  """🔥 Development mode - run MCP server with hot-reload.
387
392
 
@@ -422,6 +427,7 @@ def dev(
422
427
  watch,
423
428
  docker=docker,
424
429
  docker_args=docker_args,
430
+ new=new,
425
431
  )
426
432
 
427
433
 
@@ -740,7 +746,7 @@ def init(
740
746
  None,
741
747
  "--preset",
742
748
  "-p",
743
- help="Preset to use: blank, deep-research, browser. If omitted, you'll choose interactively.", # noqa: E501
749
+ help="Preset to use: blank, deep-research, browser, rubrics. If omitted, you'll choose interactively.", # noqa: E501
744
750
  ),
745
751
  directory: str = typer.Option(".", "--dir", "-d", help="Target directory"),
746
752
  force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing files"),
@@ -1079,6 +1085,51 @@ def rl(
1079
1085
  )
1080
1086
 
1081
1087
 
1088
+ @app.command()
1089
+ def convert(
1090
+ tasks_file: str = typer.Argument(
1091
+ ..., help="Path to tasks file (JSON/JSONL) to convert to remote MCP configuration"
1092
+ ),
1093
+ ) -> None:
1094
+ """Convert local MCP task configs to remote (mcp.hud.so) format.
1095
+
1096
+ This mirrors the implicit conversion flow used by 'hud rl' and writes a new
1097
+ remote_<name>.json next to the source file when needed.
1098
+ """
1099
+ from pathlib import Path
1100
+
1101
+ from hud.utils.hud_console import HUDConsole
1102
+
1103
+ hud_console = HUDConsole()
1104
+
1105
+ try:
1106
+ from .flows.tasks import convert_tasks_to_remote
1107
+
1108
+ result_path = convert_tasks_to_remote(tasks_file)
1109
+
1110
+ # If nothing changed, inform the user
1111
+ try:
1112
+ if Path(result_path).resolve() == Path(tasks_file).resolve():
1113
+ hud_console.success(
1114
+ "Tasks already reference remote MCP URLs. No conversion needed."
1115
+ )
1116
+ hud_console.hint("You can run them directly with: hud eval <tasks_file> --full")
1117
+ return
1118
+ except Exception as e:
1119
+ # Best effort; continue with success message
1120
+ hud_console.debug(f"Path comparison failed, continuing: {e}")
1121
+
1122
+ hud_console.success(f"Converted tasks written to: {result_path}")
1123
+ hud_console.hint(
1124
+ "You can now run remote flows: hud rl <converted_file> or hud eval <converted_file>"
1125
+ )
1126
+ except typer.Exit:
1127
+ raise
1128
+ except Exception as e:
1129
+ hud_console.error(f"Failed to convert tasks: {e}")
1130
+ raise typer.Exit(1) from e
1131
+
1132
+
1082
1133
  @app.command()
1083
1134
  def set(
1084
1135
  assignments: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
hud/cli/build.py CHANGED
@@ -5,6 +5,8 @@ from __future__ import annotations
5
5
  import asyncio
6
6
  import contextlib
7
7
  import hashlib
8
+ import json
9
+ import re
8
10
  import subprocess
9
11
  import time
10
12
  from datetime import UTC, datetime
@@ -50,6 +52,140 @@ def increment_version(version_str: str, increment_type: str = "patch") -> str:
50
52
  return f"{major}.{minor}.{patch + 1}"
51
53
 
52
54
 
55
+ def find_task_files_in_env(env_dir: Path) -> list[Path]:
56
+ """Find all task files in an environment directory.
57
+
58
+ This looks for .json and .jsonl files that contain task definitions,
59
+ excluding config files and lock files.
60
+
61
+ Args:
62
+ env_dir: Environment directory to search
63
+
64
+ Returns:
65
+ List of task file paths
66
+ """
67
+ task_files: list[Path] = []
68
+
69
+ # Find all .json and .jsonl files
70
+ json_files = list(env_dir.glob("*.json")) + list(env_dir.glob("*.jsonl"))
71
+
72
+ # Filter out config files and lock files
73
+ for file in json_files:
74
+ # Skip hidden files, config files, and lock files
75
+ if (
76
+ file.name.startswith(".")
77
+ or file.name == "package.json"
78
+ or file.name == "tsconfig.json"
79
+ or file.name == "gcp.json"
80
+ or file.name.endswith(".lock.json")
81
+ ):
82
+ continue
83
+
84
+ # Check if it's a task file by looking for mcp_config
85
+ try:
86
+ with open(file, encoding="utf-8") as f:
87
+ content = json.load(f)
88
+
89
+ # It's a task file if it's a list with mcp_config entries
90
+ if (
91
+ isinstance(content, list)
92
+ and len(content) > 0
93
+ and any(isinstance(item, dict) and "mcp_config" in item for item in content)
94
+ ):
95
+ task_files.append(file)
96
+ except (json.JSONDecodeError, Exception): # noqa: S112
97
+ continue
98
+
99
+ return task_files
100
+
101
+
102
+ def update_tasks_json_versions(
103
+ env_dir: Path, base_name: str, old_version: str | None, new_version: str
104
+ ) -> list[Path]:
105
+ """Update image references in tasks.json files to use the new version.
106
+
107
+ Args:
108
+ env_dir: Environment directory
109
+ base_name: Base image name (without version)
110
+ old_version: Previous version (if any)
111
+ new_version: New version to use
112
+
113
+ Returns:
114
+ List of updated task files
115
+ """
116
+ hud_console = HUDConsole()
117
+ updated_files: list[Path] = []
118
+
119
+ for task_file in find_task_files_in_env(env_dir):
120
+ try:
121
+ with open(task_file, encoding="utf-8") as f:
122
+ tasks = json.load(f)
123
+ if not isinstance(tasks, list):
124
+ continue
125
+
126
+ modified = False
127
+
128
+ # Process each task
129
+ for task in tasks:
130
+ if not isinstance(task, dict) or "mcp_config" not in task:
131
+ continue
132
+
133
+ mcp_config = task["mcp_config"]
134
+
135
+ # Handle local Docker format
136
+ if "local" in mcp_config and isinstance(mcp_config["local"], dict):
137
+ local_config = mcp_config["local"]
138
+
139
+ # Check for docker run args
140
+ if "args" in local_config and isinstance(local_config["args"], list):
141
+ for i, arg in enumerate(local_config["args"]):
142
+ # Match image references
143
+ if isinstance(arg, str) and (
144
+ arg == f"{base_name}:latest"
145
+ or (old_version and arg == f"{base_name}:{old_version}")
146
+ or re.match(rf"^{re.escape(base_name)}:\d+\.\d+\.\d+$", arg)
147
+ ):
148
+ # Update to new version
149
+ local_config["args"][i] = f"{base_name}:{new_version}"
150
+ modified = True
151
+
152
+ # Handle HUD API format (remote MCP)
153
+ elif "hud" in mcp_config and isinstance(mcp_config["hud"], dict):
154
+ hud_config = mcp_config["hud"]
155
+
156
+ # Check headers for Mcp-Image
157
+ if "headers" in hud_config and isinstance(hud_config["headers"], dict):
158
+ headers = hud_config["headers"]
159
+
160
+ if "Mcp-Image" in headers:
161
+ image_ref = headers["Mcp-Image"]
162
+
163
+ # Match various image formats
164
+ if isinstance(image_ref, str) and ":" in image_ref:
165
+ # Split into image name and tag
166
+ image_name, _ = image_ref.rsplit(":", 1)
167
+
168
+ if (
169
+ image_name == base_name # Exact match
170
+ or image_name.endswith(f"/{base_name}") # With prefix
171
+ ):
172
+ # Update to new version, preserving the full image path
173
+ headers["Mcp-Image"] = f"{image_name}:{new_version}"
174
+ modified = True
175
+
176
+ # Save the file if modified
177
+ if modified:
178
+ with open(task_file, "w") as f:
179
+ json.dump(tasks, f, indent=2)
180
+ updated_files.append(task_file)
181
+ hud_console.success(f"Updated {task_file.name} with version {new_version}")
182
+
183
+ except Exception as e:
184
+ hud_console.warning(f"Could not update {task_file.name}: {e}")
185
+
186
+ return updated_files
187
+
188
+
53
189
  def get_existing_version(lock_path: Path) -> str | None:
54
190
  """Get the internal version from existing lock file if it exists."""
55
191
  if not lock_path.exists():
@@ -386,28 +522,24 @@ def build_environment(
386
522
  dockerfile_path = env_dir / "Dockerfile"
387
523
  required_env, optional_env = extract_env_vars_from_dockerfile(dockerfile_path)
388
524
 
389
- # Merge user-provided env vars with detected ones
390
- provided_env_vars: dict[str, str] = {}
391
- missing_required = []
392
- if env_vars:
393
- # Use placeholders in lock file for any provided values to avoid storing secrets
394
- provided_env_vars = {k: f"${{{k}}}" for k in env_vars}
395
- # Track which required vars are still missing
396
- missing_required = [e for e in required_env if e not in env_vars]
397
-
398
- # Show what env vars were provided
399
- hud_console.success(f"Using provided environment variables: {', '.join(env_vars.keys())}")
400
- else:
401
- missing_required = required_env[:]
525
+ # Show env vars detected from .env file
526
+ if env_from_file:
527
+ hud_console.info(
528
+ f"Detected environment variables from .env file: {', '.join(sorted(env_from_file.keys()))}" # noqa: E501
529
+ )
530
+
531
+ # Create a complete set of all required variables for warning
532
+ all_required_for_warning = set(required_env)
533
+ all_required_for_warning.update(env_from_file.keys())
534
+
535
+ # Find which ones are missing (not provided via -e flags)
536
+ all_missing = all_required_for_warning - set(env_vars.keys() if env_vars else [])
402
537
 
403
- # Warn about missing required variables
404
- if missing_required:
538
+ if all_missing:
405
539
  hud_console.warning(
406
- f"Missing required environment variables: {', '.join(missing_required)}"
407
- )
408
- hud_console.info(
409
- "These can be added to the lock file after build or provided with -e flags"
540
+ f"Environment variables not provided via -e flags: {', '.join(sorted(all_missing))}"
410
541
  )
542
+ hud_console.info("These will be added to the required list in the lock file")
411
543
 
412
544
  # Check for existing version and increment
413
545
  lock_path = env_dir / "hud.lock.yaml"
@@ -449,7 +581,13 @@ def build_environment(
449
581
  }
450
582
 
451
583
  # Add environment variables section if any exist
452
- if missing_required or optional_env or provided_env_vars:
584
+ # Include env vars from .env file as well
585
+ env_vars_from_file = set(env_from_file.keys()) if env_from_file else set()
586
+
587
+ # Check if we have any env vars to document
588
+ has_env_vars = bool(required_env or optional_env or env_vars or env_vars_from_file)
589
+
590
+ if has_env_vars:
453
591
  lock_content["environment"]["variables"] = {}
454
592
 
455
593
  # Add note about editing environment variables
@@ -458,10 +596,21 @@ def build_environment(
458
596
  "Provided variables will be used when running the environment."
459
597
  )
460
598
 
461
- if provided_env_vars:
462
- lock_content["environment"]["variables"]["provided"] = provided_env_vars
463
- if missing_required:
464
- lock_content["environment"]["variables"]["required"] = missing_required
599
+ # Combine all required variables: from Dockerfile, .env file, and provided vars
600
+ all_required = set(required_env)
601
+
602
+ # Add all env vars from .env file to required
603
+ all_required.update(env_vars_from_file)
604
+
605
+ # Add all provided env vars to required
606
+ if env_vars:
607
+ all_required.update(env_vars.keys())
608
+
609
+ # Remove any that are optional - they stay in optional
610
+ all_required = all_required - set(optional_env)
611
+
612
+ if all_required:
613
+ lock_content["environment"]["variables"]["required"] = sorted(list(all_required))
465
614
  if optional_env:
466
615
  lock_content["environment"]["variables"]["optional"] = optional_env
467
616
 
@@ -579,6 +728,17 @@ def build_environment(
579
728
  local_ref = lock_content.get("images", {}).get("local", version_tag)
580
729
  save_to_registry(lock_content, local_ref, verbose)
581
730
 
731
+ # Update tasks.json files with new version
732
+ hud_console.progress_message("Updating task files with new version...")
733
+ updated_task_files = update_tasks_json_versions(
734
+ env_dir, base_name, existing_version, new_version
735
+ )
736
+
737
+ if updated_task_files:
738
+ hud_console.success(f"Updated {len(updated_task_files)} task file(s)")
739
+ else:
740
+ hud_console.dim_info("No task files found or updated", value="")
741
+
582
742
  # Print summary
583
743
  hud_console.section_title("Build Complete")
584
744
 
@@ -602,7 +762,7 @@ def build_environment(
602
762
  hud_console.section_title("Next Steps")
603
763
  hud_console.info("Test locally:")
604
764
  hud_console.command_example("hud dev", "Hot-reload development")
605
- hud_console.command_example(f"hud run {latest_tag}", "Run the built image")
765
+ hud_console.command_example(f"hud run {version_tag}", "Run the built image")
606
766
  hud_console.info("")
607
767
  hud_console.info("Publish to registry:")
608
768
  hud_console.command_example("hud push", f"Push as {version_tag}")