hud-python 0.4.53__py3-none-any.whl → 0.4.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/agents/base.py CHANGED
@@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Literal
11
11
 
12
12
  import mcp.types as types
13
13
 
14
+ from hud.agents.utils import log_agent_metadata_to_status, log_task_config_to_current_trace
14
15
  from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace
15
16
  from hud.utils.hud_console import HUDConsole
16
17
  from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
@@ -62,6 +63,7 @@ class MCPAgent(ABC):
62
63
  initial_screenshot: bool = True,
63
64
  # Misc
64
65
  model_name: str = "mcp-agent",
66
+ checkpoint_name: str | None = None,
65
67
  response_agent: ResponseAgent | None = None,
66
68
  auto_trace: bool = True,
67
69
  verbose: bool = False,
@@ -92,6 +94,7 @@ class MCPAgent(ABC):
92
94
  self._auto_created_client = False # Track if we created the client
93
95
 
94
96
  self.model_name = model_name
97
+ self.checkpoint_name = checkpoint_name
95
98
  self.console = HUDConsole(logger=logger)
96
99
 
97
100
  # Set verbose mode if requested
@@ -198,6 +201,8 @@ class MCPAgent(ABC):
198
201
  f"Agent initialized with {len(self.get_available_tools())} tools: {', '.join([t.name for t in self.get_available_tools()])}" # noqa: E501
199
202
  )
200
203
 
204
+ await log_agent_metadata_to_status(self.model_name, self.checkpoint_name)
205
+
201
206
  async def run(self, prompt_or_task: str | Task | dict[str, Any], max_steps: int = 10) -> Trace:
202
207
  """
203
208
  Run the agent with the given prompt or task.
@@ -223,6 +228,9 @@ class MCPAgent(ABC):
223
228
 
224
229
  # Handle Task objects with full lifecycle
225
230
  if isinstance(prompt_or_task, Task):
231
+ # Log a compact summary of task config to the current trace (async)
232
+ await log_task_config_to_current_trace(prompt_or_task)
233
+
226
234
  return await self.run_task(prompt_or_task, max_steps)
227
235
 
228
236
  # Handle simple string prompts
hud/agents/claude.py CHANGED
@@ -89,7 +89,8 @@ class ClaudeAgent(MCPAgent):
89
89
  self.use_computer_beta = use_computer_beta
90
90
  self.hud_console = HUDConsole(logger=logger)
91
91
 
92
- self.model_name = self.model
92
+ self.model_name = "Claude"
93
+ self.checkpoint_name = self.model
93
94
 
94
95
  # Track mapping from Claude tool names to MCP tool names
95
96
  self._claude_to_mcp_tool_map: dict[str, str] = {}
@@ -98,14 +99,14 @@ class ClaudeAgent(MCPAgent):
98
99
  # Append Claude-specific instructions to the base system prompt
99
100
  claude_instructions = """
100
101
  You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
101
-
102
+
102
103
  When working on tasks:
103
104
  1. Be thorough and systematic in your approach
104
105
  2. Complete tasks autonomously without asking for confirmation
105
106
  3. Use available tools efficiently to accomplish your goals
106
107
  4. Verify your actions and ensure task completion
107
108
  5. Be precise and accurate in all operations
108
-
109
+
109
110
  Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
110
111
  """.strip() # noqa: E501
111
112
 
hud/agents/openai.py CHANGED
@@ -70,6 +70,7 @@ class OperatorAgent(MCPAgent):
70
70
 
71
71
  self.openai_client = model_client
72
72
  self.model = model
73
+ self.checkpoint_name = self.model
73
74
  self.environment = environment
74
75
 
75
76
  # State tracking for OpenAI's stateful API
@@ -84,7 +85,7 @@ class OperatorAgent(MCPAgent):
84
85
  except Exception as e:
85
86
  raise ValueError(f"OpenAI API key is invalid: {e}") from e
86
87
 
87
- self.model_name = "openai-" + self.model
88
+ self.model_name = "Operator"
88
89
 
89
90
  # Append OpenAI-specific instructions to the base system prompt
90
91
  openai_instructions = """
@@ -62,7 +62,8 @@ class GenericOpenAIChatAgent(MCPAgent):
62
62
  else:
63
63
  raise ValueError("Either openai_client or (api_key and base_url) must be provided")
64
64
 
65
- self.model_name = model_name
65
+ self.model_name = "GenericOpenAI"
66
+ self.checkpoint_name = model_name
66
67
  self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
67
68
  self.mcp_schemas = []
68
69
  self.hud_console = HUDConsole(logger=logger)
@@ -194,7 +195,7 @@ class GenericOpenAIChatAgent(MCPAgent):
194
195
  raise ValueError("openai_client is required for GenericOpenAIChatAgent")
195
196
  # default transport = OpenAI SDK
196
197
  return await self.oai.chat.completions.create(
197
- model=self.model_name,
198
+ model=self.checkpoint_name,
198
199
  messages=messages,
199
200
  tools=tools, # type: ignore ready ChatCompletionToolParam-shaped
200
201
  **extra,
@@ -89,7 +89,7 @@ class TestClaudeAgent:
89
89
  validate_api_key=False, # Skip validation in tests
90
90
  )
91
91
 
92
- assert agent.model_name == "claude-3-opus-20240229"
92
+ assert agent.model_name == "Claude"
93
93
  assert agent.max_tokens == 1000
94
94
  assert agent.anthropic_client == mock_model_client
95
95
 
@@ -103,7 +103,7 @@ class TestClaudeAgent:
103
103
  validate_api_key=False, # Skip validation in tests
104
104
  )
105
105
 
106
- assert agent.model_name == "claude-3-opus-20240229"
106
+ assert agent.model_name == "Claude"
107
107
  assert agent.anthropic_client is not None
108
108
 
109
109
  @pytest.mark.asyncio
@@ -50,7 +50,7 @@ class TestOperatorAgent:
50
50
  validate_api_key=False, # Skip validation in tests
51
51
  )
52
52
 
53
- assert agent.model_name == "openai-gpt-4"
53
+ assert agent.model_name == "Operator"
54
54
  assert agent.model == "gpt-4"
55
55
  assert agent.openai_client == mock_model_client
56
56
 
hud/agents/utils.py ADDED
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ from typing import TYPE_CHECKING
5
+
6
+ from hud.otel.context import (
7
+ _update_task_status_async,
8
+ get_current_task_run_id,
9
+ )
10
+
11
+ if TYPE_CHECKING:
12
+ from hud.datasets import Task
13
+
14
+
15
+ async def log_task_config_to_current_trace(task: Task) -> None:
16
+ with contextlib.suppress(Exception):
17
+ task_run_id = get_current_task_run_id()
18
+ if not task_run_id:
19
+ return
20
+
21
+ raw_config = task.model_dump()
22
+
23
+ await _update_task_status_async(
24
+ task_run_id,
25
+ "running",
26
+ task_id=task.id,
27
+ extra_metadata={"task_config": raw_config},
28
+ )
29
+
30
+
31
+ async def log_agent_metadata_to_status(
32
+ model_name: str | None = None, checkpoint_name: str | None = None
33
+ ) -> None:
34
+ """Attach agent metadata (model/checkpoint) to current trace status metadata."""
35
+ with contextlib.suppress(Exception):
36
+ task_run_id = get_current_task_run_id()
37
+ if not task_run_id or (not model_name and not checkpoint_name):
38
+ return
39
+
40
+ agent_meta = {}
41
+ if model_name is not None:
42
+ agent_meta["model_name"] = model_name
43
+ if checkpoint_name is not None:
44
+ agent_meta["checkpoint_name"] = checkpoint_name
45
+
46
+ await _update_task_status_async(
47
+ task_run_id,
48
+ "running",
49
+ extra_metadata={"agent": agent_meta},
50
+ )
hud/cli/__init__.py CHANGED
@@ -12,6 +12,8 @@ from rich.console import Console
12
12
  from rich.panel import Panel
13
13
  from rich.table import Table
14
14
 
15
+ from hud.types import AgentType
16
+
15
17
  from . import list_func as list_module
16
18
  from .analyze import (
17
19
  analyze_environment,
@@ -380,6 +382,11 @@ def dev(
380
382
  "--watch",
381
383
  help="Additional directories to watch for changes (default: current directory)",
382
384
  ),
385
+ new: bool = typer.Option(
386
+ False,
387
+ "--new",
388
+ help="Show Cursor installation link for new server setup",
389
+ ),
383
390
  ) -> None:
384
391
  """🔥 Development mode - run MCP server with hot-reload.
385
392
 
@@ -420,6 +427,7 @@ def dev(
420
427
  watch,
421
428
  docker=docker,
422
429
  docker_args=docker_args,
430
+ new=new,
423
431
  )
424
432
 
425
433
 
@@ -847,7 +855,7 @@ def eval(
847
855
  hud_console = HUDConsole()
848
856
 
849
857
  if integration_test:
850
- agent = "integration_test"
858
+ agent = AgentType.INTEGRATION_TEST
851
859
 
852
860
  # If no source provided, reuse RL helper to find a tasks file interactively
853
861
  if source is None:
@@ -894,17 +902,17 @@ def eval(
894
902
  # Add standard agent choices
895
903
  choices.extend(
896
904
  [
897
- {"name": "Claude 4 Sonnet", "value": "claude"},
898
- {"name": "OpenAI Computer Use", "value": "openai"},
899
- {"name": "vLLM (Local Server)", "value": "vllm"},
900
- {"name": "LiteLLM (Multi-provider)", "value": "litellm"},
905
+ {"name": "Claude 4 Sonnet", "value": AgentType.CLAUDE},
906
+ {"name": "OpenAI Computer Use", "value": AgentType.OPENAI},
907
+ {"name": "vLLM (Local Server)", "value": AgentType.VLLM},
908
+ {"name": "LiteLLM (Multi-provider)", "value": AgentType.LITELLM},
901
909
  ]
902
910
  )
903
911
 
904
912
  agent = hud_console.select("Select an agent to use:", choices=choices, default=0)
905
913
 
906
914
  # Handle HUD model selection
907
- if agent and agent not in ["claude", "openai", "vllm", "litellm", "integration_test"]:
915
+ if agent and agent not in [e.value for e in AgentType]:
908
916
  # Find remote model name
909
917
  model = agent
910
918
  if not vllm_base_url:
@@ -921,20 +929,23 @@ def eval(
921
929
  hud_console.error(f"Model {model} not found")
922
930
  raise typer.Exit(1)
923
931
  model = base_model
924
- agent = "vllm" # Use vLLM backend for HUD models
932
+ agent = AgentType.VLLM # Use vLLM backend for HUD models
925
933
  hud_console.info(f"Using HUD model: {model} (trained on {base_model})")
926
934
 
927
935
  # Validate agent choice
928
- valid_agents = ["claude", "openai", "vllm", "litellm", "integration_test"]
936
+ valid_agents = [e.value for e in AgentType]
929
937
  if agent not in valid_agents:
930
938
  hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
931
939
  raise typer.Exit(1)
932
940
 
941
+ # Type narrowing: agent is now guaranteed to be an AgentType value after validation
942
+ agent = AgentType(agent)
943
+
933
944
  # Run the command
934
945
  eval_command(
935
946
  source=source,
936
947
  full=full,
937
- agent=agent, # type: ignore
948
+ agent=agent,
938
949
  model=model,
939
950
  allowed_tools=allowed_tools,
940
951
  max_concurrent=max_concurrent,
@@ -1074,6 +1085,51 @@ def rl(
1074
1085
  )
1075
1086
 
1076
1087
 
1088
+ @app.command()
1089
+ def convert(
1090
+ tasks_file: str = typer.Argument(
1091
+ ..., help="Path to tasks file (JSON/JSONL) to convert to remote MCP configuration"
1092
+ ),
1093
+ ) -> None:
1094
+ """Convert local MCP task configs to remote (mcp.hud.so) format.
1095
+
1096
+ This mirrors the implicit conversion flow used by 'hud rl' and writes a new
1097
+ remote_<name>.json next to the source file when needed.
1098
+ """
1099
+ from pathlib import Path
1100
+
1101
+ from hud.utils.hud_console import HUDConsole
1102
+
1103
+ hud_console = HUDConsole()
1104
+
1105
+ try:
1106
+ from .flows.tasks import convert_tasks_to_remote
1107
+
1108
+ result_path = convert_tasks_to_remote(tasks_file)
1109
+
1110
+ # If nothing changed, inform the user
1111
+ try:
1112
+ if Path(result_path).resolve() == Path(tasks_file).resolve():
1113
+ hud_console.success(
1114
+ "Tasks already reference remote MCP URLs. No conversion needed."
1115
+ )
1116
+ hud_console.hint("You can run them directly with: hud eval <tasks_file> --full")
1117
+ return
1118
+ except Exception as e:
1119
+ # Best effort; continue with success message
1120
+ hud_console.debug(f"Path comparison failed, continuing: {e}")
1121
+
1122
+ hud_console.success(f"Converted tasks written to: {result_path}")
1123
+ hud_console.hint(
1124
+ "You can now run remote flows: hud rl <converted_file> or hud eval <converted_file>"
1125
+ )
1126
+ except typer.Exit:
1127
+ raise
1128
+ except Exception as e:
1129
+ hud_console.error(f"Failed to convert tasks: {e}")
1130
+ raise typer.Exit(1) from e
1131
+
1132
+
1077
1133
  @app.command()
1078
1134
  def set(
1079
1135
  assignments: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
hud/cli/build.py CHANGED
@@ -5,6 +5,8 @@ from __future__ import annotations
5
5
  import asyncio
6
6
  import contextlib
7
7
  import hashlib
8
+ import json
9
+ import re
8
10
  import subprocess
9
11
  import time
10
12
  from datetime import UTC, datetime
@@ -50,6 +52,140 @@ def increment_version(version_str: str, increment_type: str = "patch") -> str:
50
52
  return f"{major}.{minor}.{patch + 1}"
51
53
 
52
54
 
55
+ def find_task_files_in_env(env_dir: Path) -> list[Path]:
56
+ """Find all task files in an environment directory.
57
+
58
+ This looks for .json and .jsonl files that contain task definitions,
59
+ excluding config files and lock files.
60
+
61
+ Args:
62
+ env_dir: Environment directory to search
63
+
64
+ Returns:
65
+ List of task file paths
66
+ """
67
+ task_files: list[Path] = []
68
+
69
+ # Find all .json and .jsonl files
70
+ json_files = list(env_dir.glob("*.json")) + list(env_dir.glob("*.jsonl"))
71
+
72
+ # Filter out config files and lock files
73
+ for file in json_files:
74
+ # Skip hidden files, config files, and lock files
75
+ if (
76
+ file.name.startswith(".")
77
+ or file.name == "package.json"
78
+ or file.name == "tsconfig.json"
79
+ or file.name == "gcp.json"
80
+ or file.name.endswith(".lock.json")
81
+ ):
82
+ continue
83
+
84
+ # Check if it's a task file by looking for mcp_config
85
+ try:
86
+ with open(file, encoding="utf-8") as f:
87
+ content = json.load(f)
88
+
89
+ # It's a task file if it's a list with mcp_config entries
90
+ if (
91
+ isinstance(content, list)
92
+ and len(content) > 0
93
+ and any(isinstance(item, dict) and "mcp_config" in item for item in content)
94
+ ):
95
+ task_files.append(file)
96
+ except (json.JSONDecodeError, Exception): # noqa: S112
97
+ continue
98
+
99
+ return task_files
100
+
101
+
102
+ def update_tasks_json_versions(
103
+ env_dir: Path, base_name: str, old_version: str | None, new_version: str
104
+ ) -> list[Path]:
105
+ """Update image references in tasks.json files to use the new version.
106
+
107
+ Args:
108
+ env_dir: Environment directory
109
+ base_name: Base image name (without version)
110
+ old_version: Previous version (if any)
111
+ new_version: New version to use
112
+
113
+ Returns:
114
+ List of updated task files
115
+ """
116
+ hud_console = HUDConsole()
117
+ updated_files: list[Path] = []
118
+
119
+ for task_file in find_task_files_in_env(env_dir):
120
+ try:
121
+ with open(task_file, encoding="utf-8") as f:
122
+ tasks = json.load(f)
123
+ if not isinstance(tasks, list):
124
+ continue
125
+
126
+ modified = False
127
+
128
+ # Process each task
129
+ for task in tasks:
130
+ if not isinstance(task, dict) or "mcp_config" not in task:
131
+ continue
132
+
133
+ mcp_config = task["mcp_config"]
134
+
135
+ # Handle local Docker format
136
+ if "local" in mcp_config and isinstance(mcp_config["local"], dict):
137
+ local_config = mcp_config["local"]
138
+
139
+ # Check for docker run args
140
+ if "args" in local_config and isinstance(local_config["args"], list):
141
+ for i, arg in enumerate(local_config["args"]):
142
+ # Match image references
143
+ if isinstance(arg, str) and (
144
+ arg == f"{base_name}:latest"
145
+ or (old_version and arg == f"{base_name}:{old_version}")
146
+ or re.match(rf"^{re.escape(base_name)}:\d+\.\d+\.\d+$", arg)
147
+ ):
148
+ # Update to new version
149
+ local_config["args"][i] = f"{base_name}:{new_version}"
150
+ modified = True
151
+
152
+ # Handle HUD API format (remote MCP)
153
+ elif "hud" in mcp_config and isinstance(mcp_config["hud"], dict):
154
+ hud_config = mcp_config["hud"]
155
+
156
+ # Check headers for Mcp-Image
157
+ if "headers" in hud_config and isinstance(hud_config["headers"], dict):
158
+ headers = hud_config["headers"]
159
+
160
+ if "Mcp-Image" in headers:
161
+ image_ref = headers["Mcp-Image"]
162
+
163
+ # Match various image formats
164
+ if isinstance(image_ref, str) and ":" in image_ref:
165
+ # Split into image name and tag
166
+ image_name, _ = image_ref.rsplit(":", 1)
167
+
168
+ if (
169
+ image_name == base_name # Exact match
170
+ or image_name.endswith(f"/{base_name}") # With prefix
171
+ ):
172
+ # Update to new version, preserving the full image path
173
+ headers["Mcp-Image"] = f"{image_name}:{new_version}"
174
+ modified = True
175
+
176
+ # Save the file if modified
177
+ if modified:
178
+ with open(task_file, "w") as f:
179
+ json.dump(tasks, f, indent=2)
180
+ updated_files.append(task_file)
181
+ hud_console.success(f"Updated {task_file.name} with version {new_version}")
182
+
183
+ except Exception as e:
184
+ hud_console.warning(f"Could not update {task_file.name}: {e}")
185
+
186
+ return updated_files
187
+
188
+
53
189
  def get_existing_version(lock_path: Path) -> str | None:
54
190
  """Get the internal version from existing lock file if it exists."""
55
191
  if not lock_path.exists():
@@ -386,28 +522,24 @@ def build_environment(
386
522
  dockerfile_path = env_dir / "Dockerfile"
387
523
  required_env, optional_env = extract_env_vars_from_dockerfile(dockerfile_path)
388
524
 
389
- # Merge user-provided env vars with detected ones
390
- provided_env_vars: dict[str, str] = {}
391
- missing_required = []
392
- if env_vars:
393
- # Use placeholders in lock file for any provided values to avoid storing secrets
394
- provided_env_vars = {k: f"${{{k}}}" for k in env_vars}
395
- # Track which required vars are still missing
396
- missing_required = [e for e in required_env if e not in env_vars]
397
-
398
- # Show what env vars were provided
399
- hud_console.success(f"Using provided environment variables: {', '.join(env_vars.keys())}")
400
- else:
401
- missing_required = required_env[:]
525
+ # Show env vars detected from .env file
526
+ if env_from_file:
527
+ hud_console.info(
528
+ f"Detected environment variables from .env file: {', '.join(sorted(env_from_file.keys()))}" # noqa: E501
529
+ )
530
+
531
+ # Create a complete set of all required variables for warning
532
+ all_required_for_warning = set(required_env)
533
+ all_required_for_warning.update(env_from_file.keys())
534
+
535
+ # Find which ones are missing (not provided via -e flags)
536
+ all_missing = all_required_for_warning - set(env_vars.keys() if env_vars else [])
402
537
 
403
- # Warn about missing required variables
404
- if missing_required:
538
+ if all_missing:
405
539
  hud_console.warning(
406
- f"Missing required environment variables: {', '.join(missing_required)}"
407
- )
408
- hud_console.info(
409
- "These can be added to the lock file after build or provided with -e flags"
540
+ f"Environment variables not provided via -e flags: {', '.join(sorted(all_missing))}"
410
541
  )
542
+ hud_console.info("These will be added to the required list in the lock file")
411
543
 
412
544
  # Check for existing version and increment
413
545
  lock_path = env_dir / "hud.lock.yaml"
@@ -449,7 +581,13 @@ def build_environment(
449
581
  }
450
582
 
451
583
  # Add environment variables section if any exist
452
- if missing_required or optional_env or provided_env_vars:
584
+ # Include env vars from .env file as well
585
+ env_vars_from_file = set(env_from_file.keys()) if env_from_file else set()
586
+
587
+ # Check if we have any env vars to document
588
+ has_env_vars = bool(required_env or optional_env or env_vars or env_vars_from_file)
589
+
590
+ if has_env_vars:
453
591
  lock_content["environment"]["variables"] = {}
454
592
 
455
593
  # Add note about editing environment variables
@@ -458,10 +596,21 @@ def build_environment(
458
596
  "Provided variables will be used when running the environment."
459
597
  )
460
598
 
461
- if provided_env_vars:
462
- lock_content["environment"]["variables"]["provided"] = provided_env_vars
463
- if missing_required:
464
- lock_content["environment"]["variables"]["required"] = missing_required
599
+ # Combine all required variables: from Dockerfile, .env file, and provided vars
600
+ all_required = set(required_env)
601
+
602
+ # Add all env vars from .env file to required
603
+ all_required.update(env_vars_from_file)
604
+
605
+ # Add all provided env vars to required
606
+ if env_vars:
607
+ all_required.update(env_vars.keys())
608
+
609
+ # Remove any that are optional - they stay in optional
610
+ all_required = all_required - set(optional_env)
611
+
612
+ if all_required:
613
+ lock_content["environment"]["variables"]["required"] = sorted(list(all_required))
465
614
  if optional_env:
466
615
  lock_content["environment"]["variables"]["optional"] = optional_env
467
616
 
@@ -579,6 +728,17 @@ def build_environment(
579
728
  local_ref = lock_content.get("images", {}).get("local", version_tag)
580
729
  save_to_registry(lock_content, local_ref, verbose)
581
730
 
731
+ # Update tasks.json files with new version
732
+ hud_console.progress_message("Updating task files with new version...")
733
+ updated_task_files = update_tasks_json_versions(
734
+ env_dir, base_name, existing_version, new_version
735
+ )
736
+
737
+ if updated_task_files:
738
+ hud_console.success(f"Updated {len(updated_task_files)} task file(s)")
739
+ else:
740
+ hud_console.dim_info("No task files found or updated", value="")
741
+
582
742
  # Print summary
583
743
  hud_console.section_title("Build Complete")
584
744
 
@@ -602,7 +762,7 @@ def build_environment(
602
762
  hud_console.section_title("Next Steps")
603
763
  hud_console.info("Test locally:")
604
764
  hud_console.command_example("hud dev", "Hot-reload development")
605
- hud_console.command_example(f"hud run {latest_tag}", "Run the built image")
765
+ hud_console.command_example(f"hud run {version_tag}", "Run the built image")
606
766
  hud_console.info("")
607
767
  hud_console.info("Publish to registry:")
608
768
  hud_console.command_example("hud push", f"Push as {version_tag}")