hud-python 0.4.34__py3-none-any.whl → 0.4.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/agents/claude.py CHANGED
@@ -6,7 +6,7 @@ import copy
6
6
  import logging
7
7
  from typing import TYPE_CHECKING, Any, ClassVar, cast
8
8
 
9
- from anthropic import AsyncAnthropic, BadRequestError
9
+ from anthropic import Anthropic, AsyncAnthropic, BadRequestError
10
10
  from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
11
11
 
12
12
  import hud
@@ -54,6 +54,7 @@ class ClaudeAgent(MCPAgent):
54
54
  model: str = "claude-sonnet-4-20250514",
55
55
  max_tokens: int = 4096,
56
56
  use_computer_beta: bool = True,
57
+ validate_api_key: bool = True,
57
58
  **kwargs: Any,
58
59
  ) -> None:
59
60
  """
@@ -75,6 +76,13 @@ class ClaudeAgent(MCPAgent):
75
76
  raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
76
77
  model_client = AsyncAnthropic(api_key=api_key)
77
78
 
79
+ # validate api key if requested
80
+ if validate_api_key:
81
+ try:
82
+ Anthropic(api_key=model_client.api_key).models.list()
83
+ except Exception as e:
84
+ raise ValueError(f"Anthropic API key is invalid: {e}") from e
85
+
78
86
  self.anthropic_client = model_client
79
87
  self.model = model
80
88
  self.max_tokens = max_tokens
hud/agents/openai.py CHANGED
@@ -6,7 +6,7 @@ import logging
6
6
  from typing import Any, ClassVar, Literal
7
7
 
8
8
  import mcp.types as types
9
- from openai import AsyncOpenAI
9
+ from openai import AsyncOpenAI, OpenAI
10
10
  from openai.types.responses import (
11
11
  ResponseComputerToolCall,
12
12
  ResponseInputMessageContentListParam,
@@ -45,6 +45,7 @@ class OperatorAgent(MCPAgent):
45
45
  model_client: AsyncOpenAI | None = None,
46
46
  model: str = "computer-use-preview",
47
47
  environment: Literal["windows", "mac", "linux", "browser"] = "linux",
48
+ validate_api_key: bool = True,
48
49
  **kwargs: Any,
49
50
  ) -> None:
50
51
  """
@@ -76,6 +77,13 @@ class OperatorAgent(MCPAgent):
76
77
  self.pending_call_id: str | None = None
77
78
  self.pending_safety_checks: list[Any] = []
78
79
 
80
+ # validate api key if requested
81
+ if validate_api_key:
82
+ try:
83
+ OpenAI(api_key=self.openai_client.api_key).models.list()
84
+ except Exception as e:
85
+ raise ValueError(f"OpenAI API key is invalid: {e}") from e
86
+
79
87
  self.model_name = "openai-" + self.model
80
88
 
81
89
  # Append OpenAI-specific instructions to the base system prompt
@@ -86,6 +86,7 @@ class TestClaudeAgent:
86
86
  model_client=mock_model_client,
87
87
  model="claude-3-opus-20240229",
88
88
  max_tokens=1000,
89
+ validate_api_key=False, # Skip validation in tests
89
90
  )
90
91
 
91
92
  assert agent.model_name == "claude-3-opus-20240229"
@@ -93,10 +94,14 @@ class TestClaudeAgent:
93
94
  assert agent.anthropic_client == mock_model_client
94
95
 
95
96
  @pytest.mark.asyncio
96
- async def test_init_without_model_client(self, mock_mcp_client):
97
+ async def test_init_without_model_client(self, mock_mcp_client, mock_anthropic):
97
98
  """Test agent initialization without model client."""
98
99
  with patch("hud.settings.settings.anthropic_api_key", "test_key"):
99
- agent = ClaudeAgent(mcp_client=mock_mcp_client, model="claude-3-opus-20240229")
100
+ agent = ClaudeAgent(
101
+ mcp_client=mock_mcp_client,
102
+ model="claude-3-opus-20240229",
103
+ validate_api_key=False, # Skip validation in tests
104
+ )
100
105
 
101
106
  assert agent.model_name == "claude-3-opus-20240229"
102
107
  assert agent.anthropic_client is not None
@@ -105,7 +110,11 @@ class TestClaudeAgent:
105
110
  async def test_format_blocks(self, mock_mcp_client):
106
111
  """Test formatting content blocks into Claude messages."""
107
112
  mock_model_client = MagicMock()
108
- agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
113
+ agent = ClaudeAgent(
114
+ mcp_client=mock_mcp_client,
115
+ model_client=mock_model_client,
116
+ validate_api_key=False, # Skip validation in tests
117
+ )
109
118
 
110
119
  # Test with text only
111
120
  text_blocks: list[types.ContentBlock] = [
@@ -141,7 +150,11 @@ class TestClaudeAgent:
141
150
  async def test_format_tool_results_method(self, mock_mcp_client):
142
151
  """Test the agent's format_tool_results method."""
143
152
  mock_model_client = MagicMock()
144
- agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
153
+ agent = ClaudeAgent(
154
+ mcp_client=mock_mcp_client,
155
+ model_client=mock_model_client,
156
+ validate_api_key=False, # Skip validation in tests
157
+ )
145
158
 
146
159
  tool_calls = [
147
160
  MCPToolCall(name="test_tool", arguments={}, id="id1"),
@@ -171,7 +184,11 @@ class TestClaudeAgent:
171
184
  """Test getting model response from Claude API."""
172
185
  # Disable telemetry for this test to avoid backend configuration issues
173
186
  with patch("hud.settings.settings.telemetry_enabled", False):
174
- agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
187
+ agent = ClaudeAgent(
188
+ mcp_client=mock_mcp_client,
189
+ model_client=mock_anthropic,
190
+ validate_api_key=False, # Skip validation in tests
191
+ )
175
192
 
176
193
  # Mock the API response
177
194
  mock_response = MagicMock()
@@ -215,7 +232,11 @@ class TestClaudeAgent:
215
232
  """Test getting text-only response."""
216
233
  # Disable telemetry for this test to avoid backend configuration issues
217
234
  with patch("hud.settings.settings.telemetry_enabled", False):
218
- agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
235
+ agent = ClaudeAgent(
236
+ mcp_client=mock_mcp_client,
237
+ model_client=mock_anthropic,
238
+ validate_api_key=False, # Skip validation in tests
239
+ )
219
240
 
220
241
  mock_response = MagicMock()
221
242
  # Create text block
@@ -242,7 +263,11 @@ class TestClaudeAgent:
242
263
  """Test handling API errors."""
243
264
  # Disable telemetry for this test to avoid backend configuration issues
244
265
  with patch("hud.settings.settings.telemetry_enabled", False):
245
- agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
266
+ agent = ClaudeAgent(
267
+ mcp_client=mock_mcp_client,
268
+ model_client=mock_anthropic,
269
+ validate_api_key=False, # Skip validation in tests
270
+ )
246
271
 
247
272
  # Mock API error
248
273
  mock_anthropic.beta.messages.create = AsyncMock(
@@ -44,7 +44,10 @@ class TestOperatorAgent:
44
44
  """Test agent initialization."""
45
45
  mock_model_client = MagicMock()
46
46
  agent = OperatorAgent(
47
- mcp_client=mock_mcp_client, model_client=mock_model_client, model="gpt-4"
47
+ mcp_client=mock_mcp_client,
48
+ model_client=mock_model_client,
49
+ model="gpt-4",
50
+ validate_api_key=False, # Skip validation in tests
48
51
  )
49
52
 
50
53
  assert agent.model_name == "openai-gpt-4"
@@ -55,7 +58,11 @@ class TestOperatorAgent:
55
58
  async def test_format_blocks(self, mock_mcp_client):
56
59
  """Test formatting content blocks."""
57
60
  mock_model_client = MagicMock()
58
- agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
61
+ agent = OperatorAgent(
62
+ mcp_client=mock_mcp_client,
63
+ model_client=mock_model_client,
64
+ validate_api_key=False, # Skip validation in tests
65
+ )
59
66
 
60
67
  # Test with text blocks
61
68
  blocks: list[types.ContentBlock] = [
@@ -85,7 +92,11 @@ class TestOperatorAgent:
85
92
  @pytest.mark.asyncio
86
93
  async def test_format_tool_results(self, mock_mcp_client, mock_openai):
87
94
  """Test formatting tool results."""
88
- agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
95
+ agent = OperatorAgent(
96
+ mcp_client=mock_mcp_client,
97
+ model_client=mock_openai,
98
+ validate_api_key=False, # Skip validation in tests
99
+ )
89
100
 
90
101
  tool_calls = [
91
102
  MCPToolCall(name="test_tool", arguments={}, id="call_123"), # type: ignore
@@ -111,7 +122,11 @@ class TestOperatorAgent:
111
122
  @pytest.mark.asyncio
112
123
  async def test_format_tool_results_with_error(self, mock_mcp_client, mock_openai):
113
124
  """Test formatting tool results with errors."""
114
- agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
125
+ agent = OperatorAgent(
126
+ mcp_client=mock_mcp_client,
127
+ model_client=mock_openai,
128
+ validate_api_key=False, # Skip validation in tests
129
+ )
115
130
 
116
131
  tool_calls = [
117
132
  MCPToolCall(name="failing_tool", arguments={}, id="call_error"), # type: ignore
@@ -131,7 +146,11 @@ class TestOperatorAgent:
131
146
  @pytest.mark.asyncio
132
147
  async def test_get_model_response(self, mock_mcp_client, mock_openai):
133
148
  """Test getting model response from OpenAI API."""
134
- agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
149
+ agent = OperatorAgent(
150
+ mcp_client=mock_mcp_client,
151
+ model_client=mock_openai,
152
+ validate_api_key=False, # Skip validation in tests
153
+ )
135
154
 
136
155
  # Set up available tools so agent doesn't return "No computer use tools available"
137
156
  agent._available_tools = [
@@ -162,7 +181,11 @@ class TestOperatorAgent:
162
181
  @pytest.mark.asyncio
163
182
  async def test_handle_empty_response(self, mock_mcp_client, mock_openai):
164
183
  """Test handling empty response from API."""
165
- agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
184
+ agent = OperatorAgent(
185
+ mcp_client=mock_mcp_client,
186
+ model_client=mock_openai,
187
+ validate_api_key=False, # Skip validation in tests
188
+ )
166
189
 
167
190
  # Set up available tools
168
191
  agent._available_tools = [
hud/cli/__init__.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
+ import contextlib
6
7
  import json
7
8
  import sys
8
9
  from pathlib import Path
@@ -28,6 +29,7 @@ from .init import create_environment
28
29
  from .pull import pull_command
29
30
  from .push import push_command
30
31
  from .remove import remove_command
32
+ from .utils.config import set_env_values
31
33
  from .utils.cursor import get_cursor_config_path, list_cursor_servers, parse_cursor_config
32
34
  from .utils.logging import CaptureLogger
33
35
 
@@ -116,7 +118,9 @@ def analyze(
116
118
  image, *docker_args = params
117
119
  if live or docker_args: # If docker args provided, assume live mode
118
120
  # Build Docker command from image and args
119
- docker_cmd = ["docker", "run", "--rm", "-i", *docker_args, image]
121
+ from .utils.docker import build_run_command
122
+
123
+ docker_cmd = build_run_command(image, docker_args)
120
124
  asyncio.run(analyze_environment(docker_cmd, output_format, verbose))
121
125
  else:
122
126
  # Fast mode - analyze from metadata
@@ -239,11 +243,15 @@ def debug(
239
243
  raise typer.Exit(1)
240
244
 
241
245
  # Build Docker command
242
- command = ["docker", "run", "--rm", "-i", *docker_args, image_name]
246
+ from .utils.docker import build_run_command
247
+
248
+ command = build_run_command(image_name, docker_args)
243
249
  else:
244
250
  # Assume it's an image name
245
251
  image = first_param
246
- command = ["docker", "run", "--rm", "-i", *docker_args, image]
252
+ from .utils.docker import build_run_command
253
+
254
+ command = build_run_command(image, docker_args)
247
255
  else:
248
256
  console.print(
249
257
  "[red]Error: Must specify a directory, Docker image, --config, or --cursor[/red]"
@@ -370,12 +378,10 @@ def dev(
370
378
  False, "--interactive", help="Launch interactive testing mode (HTTP mode only)"
371
379
  ),
372
380
  ) -> None:
373
- """🔥 Development mode with hot-reload.
374
-
375
- Runs your MCP environment in Docker with automatic restart on file changes.
381
+ """🔥 Development mode - interactive MCP environment.
376
382
 
377
- The container's last command (typically the MCP server) will be wrapped
378
- with watchfiles for hot-reload functionality.
383
+ Runs your MCP environment in Docker with mounted source for development.
384
+ The container's CMD determines reload behavior.
379
385
 
380
386
  Examples:
381
387
  hud dev # Auto-detect in current directory
@@ -388,13 +394,12 @@ def dev(
388
394
  hud dev . --inspector # Launch MCP Inspector (HTTP mode only)
389
395
  hud dev . --interactive # Launch interactive testing mode (HTTP mode only)
390
396
  hud dev . --no-logs # Disable Docker log streaming
391
- hud dev . --full-reload # Restart entire container on file changes (instead of just server)
392
397
 
393
398
  # With Docker arguments (after all options):
394
399
  hud dev . -e BROWSER_PROVIDER=anchorbrowser -e ANCHOR_API_KEY=xxx
395
400
  hud dev . -e API_KEY=secret -v /tmp/data:/data --network host
396
401
  hud dev . --build -e DEBUG=true --memory 2g
397
- """ # noqa: E501
402
+ """
398
403
  # Parse directory and Docker arguments
399
404
  if params:
400
405
  directory = params[0]
@@ -424,7 +429,7 @@ def dev(
424
429
  def run(
425
430
  params: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
426
431
  None,
427
- help="Docker image followed by optional arguments (e.g., 'hud-image:latest -e KEY=value')",
432
+ help="Python file/module/package or Docker image followed by optional arguments",
428
433
  ),
429
434
  local: bool = typer.Option(
430
435
  False,
@@ -474,32 +479,152 @@ def run(
474
479
  "--interactive",
475
480
  help="Launch interactive testing mode (HTTP transport only)",
476
481
  ),
482
+ reload: bool = typer.Option(
483
+ False,
484
+ "--reload",
485
+ help="Enable auto-reload on file changes (local Python files only)",
486
+ ),
487
+ watch: list[str] = typer.Option( # noqa: B008
488
+ None,
489
+ "--watch",
490
+ help="Directories to watch for changes (can be used multiple times). Defaults to current directory.", # noqa: E501
491
+ ),
492
+ cmd: str | None = typer.Option(
493
+ None,
494
+ "--cmd",
495
+ help="Command to run as MCP server (e.g., 'python -m controller')",
496
+ ),
477
497
  ) -> None:
478
- """🚀 Run MCP server locally or remotely.
498
+ """🚀 Run MCP server.
479
499
 
480
- By default, runs remotely via mcp.hud.so. Use --local for Docker.
500
+ Modes:
501
+ - Python (decorator-based): pass a dotted module path. Example: hud run controller
502
+ The module is imported, decorators register implicitly, and the server runs.
503
+ Use --reload to watch the module/package directory.
481
504
 
482
- Remote Examples:
483
- hud run hud-text-2048:latest
484
- hud run my-server:v1 -e API_KEY=xxx -h Run-Id:abc123
485
- hud run my-server:v1 --transport http --port 9000
505
+ - Command: use --cmd to run any command as an MCP server. Example: hud run --cmd "python -m controller"
506
+ Works with Docker, binaries, or any executable. Supports --reload.
486
507
 
487
- Local Examples:
488
- hud run --local hud-text-2048:latest
489
- hud run --local my-server:v1 -e API_KEY=xxx
490
- hud run --local my-server:v1 --transport http
491
-
492
- Interactive Testing (local only):
493
- hud run --local --interactive --transport http hud-text-2048:latest
494
- hud run --local --interactive --transport http --port 9000 my-server:v1
495
- """
496
- if not params:
497
- typer.echo("❌ Docker image is required")
508
+ - Docker image: pass a Docker image name (optionally with --local to run locally).
509
+ """ # noqa: E501
510
+ if not params and not cmd:
511
+ typer.echo("❌ Dotted module path, Docker image, or --cmd is required")
498
512
  raise typer.Exit(1)
499
513
 
500
- # Parse image and args
501
- image = params[0]
502
- docker_args = params[1:] if len(params) > 1 else []
514
+ # Handle --cmd mode
515
+ if cmd:
516
+ import asyncio
517
+
518
+ from .utils.package_runner import run_package_as_mcp
519
+
520
+ asyncio.run(
521
+ run_package_as_mcp(
522
+ cmd, # Pass command string
523
+ transport=transport,
524
+ port=port,
525
+ verbose=verbose,
526
+ reload=reload,
527
+ watch_paths=watch if watch else None,
528
+ )
529
+ )
530
+ return
531
+
532
+ first_param = params[0]
533
+ extra_args = params[1:] if len(params) > 1 else []
534
+
535
+ # Guard: strip accidental nested 'run' token from positional args,
536
+ # which can happen with nested invocations or reload wrappers.
537
+ if first_param == "run" and extra_args:
538
+ first_param, extra_args = extra_args[0], extra_args[1:]
539
+
540
+ # Try to interpret first_param as module[:attr] or file[:attr]
541
+ target = first_param
542
+ server_attr = "mcp"
543
+ if ":" in target:
544
+ target, server_attr = target.split(":", 1)
545
+
546
+ # Only allow dotted import paths or python files for Python mode
547
+ import importlib.util as _importlib_util
548
+
549
+ # Ensure current working directory is importable for local packages like 'controller'
550
+ try:
551
+ import sys as _sys
552
+ from pathlib import Path as _Path
553
+
554
+ cwd_str = str(_Path.cwd())
555
+ if cwd_str not in _sys.path:
556
+ _sys.path.insert(0, cwd_str)
557
+ except Exception: # noqa: S110
558
+ pass
559
+ try:
560
+ # If given a file path, detect and import via file spec
561
+ from pathlib import Path as _Path
562
+
563
+ if target.endswith(".py") and _Path(target).exists():
564
+ spec = _importlib_util.spec_from_file_location("_hud_module", target)
565
+ else:
566
+ spec = _importlib_util.find_spec(target)
567
+ except Exception:
568
+ spec = None
569
+
570
+ # Fallback: treat a local package directory (e.g. 'controller') as a module target
571
+ from pathlib import Path as _Path
572
+
573
+ pkg_dir = _Path(target)
574
+ is_pkg_dir = pkg_dir.is_dir() and (pkg_dir / "__init__.py").exists()
575
+
576
+ is_python_target = (spec is not None) or is_pkg_dir
577
+
578
+ if is_python_target and not (local or remote):
579
+ # Python file/package mode - use implicit MCP server
580
+ import asyncio
581
+
582
+ from .utils.package_runner import run_package_as_mcp, run_with_reload
583
+
584
+ if reload:
585
+ # Run with watchfiles reload
586
+ # Use user-provided watch paths or compute from module
587
+ if watch:
588
+ watch_paths = watch
589
+ else:
590
+ # Compute a watch path that works for dotted modules as well
591
+ watch_paths = [target]
592
+ if spec is not None:
593
+ origin = getattr(spec, "origin", None)
594
+ sublocs = getattr(spec, "submodule_search_locations", None)
595
+ if origin:
596
+ p = _Path(origin)
597
+ # If package __init__.py, watch the package directory
598
+ watch_paths = [str(p.parent if p.name == "__init__.py" else p)]
599
+ elif sublocs:
600
+ with contextlib.suppress(Exception):
601
+ watch_paths = [next(iter(sublocs))]
602
+
603
+ # Always run as subprocess when using reload to enable proper file watching
604
+ # This ensures the parent process can watch files while the child runs the server
605
+ run_with_reload(
606
+ None, # This forces subprocess mode for both stdio and http
607
+ watch_paths,
608
+ verbose=verbose,
609
+ )
610
+ else:
611
+ # Run normally (but still pass reload=False for consistency)
612
+ asyncio.run(
613
+ run_package_as_mcp(
614
+ target,
615
+ transport=transport,
616
+ port=port,
617
+ verbose=verbose,
618
+ server_attr=server_attr,
619
+ reload=False, # Explicitly pass reload state
620
+ watch_paths=None,
621
+ )
622
+ )
623
+ return
624
+
625
+ # Docker image mode
626
+ image = first_param
627
+ docker_args = extra_args
503
628
 
504
629
  # Handle conflicting flags
505
630
  if local and remote:
@@ -741,6 +866,12 @@ def remove(
741
866
  @app.command()
742
867
  def init(
743
868
  name: str = typer.Argument(None, help="Environment name (default: current directory name)"),
869
+ preset: str | None = typer.Option(
870
+ None,
871
+ "--preset",
872
+ "-p",
873
+ help="Preset to use: blank, deep-research, browser. If omitted, you'll choose interactively.", # noqa: E501
874
+ ),
744
875
  directory: str = typer.Option(".", "--dir", "-d", help="Target directory"),
745
876
  force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing files"),
746
877
  ) -> None:
@@ -757,7 +888,7 @@ def init(
757
888
  hud init my-env # Create in ./my-env/
758
889
  hud init my-env --dir /tmp # Create in /tmp/my-env/
759
890
  """
760
- create_environment(name, directory, force)
891
+ create_environment(name, directory, force, preset)
761
892
 
762
893
 
763
894
  @app.command()
@@ -774,7 +905,7 @@ def eval(
774
905
  source: str | None = typer.Argument(
775
906
  None,
776
907
  help=(
777
- "HuggingFace dataset identifier (e.g. 'hud-evals/SheetBench-50') or task JSON file. "
908
+ "HuggingFace dataset (e.g. 'hud-evals/SheetBench-50') or task JSON file. "
778
909
  "If not provided, looks for task.json in current directory."
779
910
  ),
780
911
  ),
@@ -846,54 +977,21 @@ def eval(
846
977
 
847
978
  hud_console = HUDConsole()
848
979
 
849
- # If no source provided, look for task/eval JSON files in current directory
980
+ # If no source provided, reuse RL helper to find a tasks file interactively
850
981
  if source is None:
851
- # Search for JSON files with "task" or "eval" in the name (case-insensitive)
852
- json_files = []
853
- patterns = [
854
- "*task*.json",
855
- "*eval*.json",
856
- "*Task*.json",
857
- "*Eval*.json",
858
- "*TASK*.json",
859
- "*EVAL*.json",
860
- ]
861
-
862
- # First check current directory
863
- for pattern in patterns:
864
- json_files.extend(Path(".").glob(pattern))
865
-
866
- # If no files found, search recursively (but limit depth to avoid deep searches)
867
- if not json_files:
868
- for pattern in patterns:
869
- # Search up to 2 levels deep
870
- json_files.extend(Path(".").glob(f"*/{pattern}"))
871
- json_files.extend(Path(".").glob(f"*/*/{pattern}"))
872
-
873
- # Remove duplicates and sort
874
- json_files = sorted(set(json_files))
875
-
876
- if not json_files:
982
+ try:
983
+ from hud.cli.utils.tasks import find_tasks_file
984
+
985
+ source = find_tasks_file(None, msg="Select a tasks file to run")
986
+ hud_console.success(f"Selected: {source}")
987
+ except Exception as e:
877
988
  hud_console.error(
878
989
  "No source provided and no task/eval JSON files found in current directory"
879
990
  )
880
991
  hud_console.info(
881
- "Usage: hud eval <source> or create a task JSON file "
882
- "(e.g., task.json, eval_config.json)"
883
- )
884
- raise typer.Exit(1)
885
- elif len(json_files) == 1:
886
- source = str(json_files[0])
887
- hud_console.info(f"Found task file: {source}")
888
- else:
889
- # Multiple files found, let user choose
890
- hud_console.info("Multiple task files found:")
891
- file_choice = hud_console.select(
892
- "Select a task file to run:",
893
- choices=[str(f) for f in json_files],
992
+ "Usage: hud eval <source> or create a task JSON file (e.g., task.json, tasks.jsonl)"
894
993
  )
895
- source = file_choice
896
- hud_console.success(f"Selected: {source}")
994
+ raise typer.Exit(1) from e
897
995
 
898
996
  # Import eval_command lazily to avoid importing agent dependencies
899
997
  try:
@@ -1085,6 +1183,42 @@ def rl(
1085
1183
  )
1086
1184
 
1087
1185
 
1186
+ @app.command()
1187
+ def set(
1188
+ assignments: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
1189
+ ..., help="One or more KEY=VALUE pairs to persist in ~/.hud/.env"
1190
+ ),
1191
+ ) -> None:
1192
+ """Persist API keys or other variables for HUD to use by default.
1193
+
1194
+ Examples:
1195
+ hud set ANTHROPIC_API_KEY=sk-... OPENAI_API_KEY=sk-...
1196
+
1197
+ Values are stored in ~/.hud/.env and are loaded by hud.settings with
1198
+ the lowest precedence (overridden by process env and project .env).
1199
+ """
1200
+ from hud.utils.hud_console import HUDConsole
1201
+
1202
+ hud_console = HUDConsole()
1203
+
1204
+ updates: dict[str, str] = {}
1205
+ for item in assignments:
1206
+ if "=" not in item:
1207
+ hud_console.error(f"Invalid assignment (expected KEY=VALUE): {item}")
1208
+ raise typer.Exit(1)
1209
+ key, value = item.split("=", 1)
1210
+ key = key.strip()
1211
+ value = value.strip()
1212
+ if not key:
1213
+ hud_console.error(f"Invalid key in assignment: {item}")
1214
+ raise typer.Exit(1)
1215
+ updates[key] = value
1216
+
1217
+ path = set_env_values(updates)
1218
+ hud_console.success("Saved credentials to user config")
1219
+ hud_console.info(f"Location: {path}")
1220
+
1221
+
1088
1222
  def main() -> None:
1089
1223
  """Main entry point for the CLI."""
1090
1224
  # Handle --version flag before Typer parses args
hud/cli/build.py CHANGED
@@ -236,10 +236,13 @@ def build_docker_image(
236
236
  hud_console.error(f"No Dockerfile found in {directory}")
237
237
  return False
238
238
 
239
+ # Default platform to match RL pipeline unless explicitly overridden
240
+ effective_platform = platform if platform is not None else "linux/amd64"
241
+
239
242
  # Build command
240
243
  cmd = ["docker", "build"]
241
- if platform:
242
- cmd.extend(["--platform", platform])
244
+ if effective_platform:
245
+ cmd.extend(["--platform", effective_platform])
243
246
  cmd.extend(["-t", tag])
244
247
  if no_cache:
245
248
  cmd.append("--no-cache")
@@ -437,8 +440,10 @@ def build_environment(
437
440
  version_tag = f"{base_name}:{new_version}"
438
441
 
439
442
  label_cmd = ["docker", "build"]
440
- if platform is not None:
441
- label_cmd.extend(["--platform", platform])
443
+ # Use same defaulting for the second build step
444
+ label_platform = platform if platform is not None else "linux/amd64"
445
+ if label_platform:
446
+ label_cmd.extend(["--platform", label_platform])
442
447
  label_cmd.extend(
443
448
  [
444
449
  "--label",
@@ -489,7 +494,7 @@ def build_environment(
489
494
  hud_console.warning("Could not retrieve image ID for lock file")
490
495
 
491
496
  # Remove temp image after we're done
492
- subprocess.run(["docker", "rmi", temp_tag], capture_output=True) # noqa: S603, S607
497
+ subprocess.run(["docker", "rmi", "-f", temp_tag], capture_output=True) # noqa: S603, S607
493
498
 
494
499
  # Add to local registry
495
500
  if image_id: