hud-python 0.4.53__py3-none-any.whl → 0.4.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/cli/dev.py CHANGED
@@ -25,6 +25,7 @@ def show_dev_server_info(
25
25
  inspector: bool,
26
26
  interactive: bool,
27
27
  env_dir: Path | None = None,
28
+ new: bool = False,
28
29
  ) -> str:
29
30
  """Show consistent server info for both Python and Docker modes.
30
31
 
@@ -125,6 +126,7 @@ async def run_mcp_module(
125
126
  verbose: bool,
126
127
  inspector: bool,
127
128
  interactive: bool,
129
+ new: bool = False,
128
130
  ) -> None:
129
131
  """Run an MCP module directly."""
130
132
  # Check if this is a reload (not first run)
@@ -222,14 +224,53 @@ async def run_mcp_module(
222
224
 
223
225
  # Show server info only on first run
224
226
  if not is_reload:
225
- show_dev_server_info(
226
- server_name=mcp_server.name or "mcp-server",
227
- port=port,
228
- transport=transport,
229
- inspector=inspector,
230
- interactive=interactive,
231
- env_dir=Path.cwd().parent if (Path.cwd().parent / "environment").exists() else None,
232
- )
227
+ # Try dynamic trace first for HTTP mode (only if --new)
228
+ live_trace_url: str | None = None
229
+ if transport == "http" and new:
230
+ try:
231
+ local_mcp_config: dict[str, dict[str, Any]] = {
232
+ "hud": {
233
+ "url": f"http://localhost:{port}/mcp",
234
+ "headers": {},
235
+ }
236
+ }
237
+
238
+ from hud.cli.flows.dev import create_dynamic_trace
239
+
240
+ live_trace_url = await create_dynamic_trace(
241
+ mcp_config=local_mcp_config,
242
+ build_status=False,
243
+ environment_name=mcp_server.name or "mcp-server",
244
+ )
245
+ except Exception: # noqa: S110
246
+ pass
247
+
248
+ # Show UI using shared flow logic
249
+ if transport == "http" and live_trace_url and new:
250
+ # Minimal UI with live trace
251
+ from hud.cli.flows.dev import generate_cursor_deeplink, show_dev_ui
252
+
253
+ server_name = mcp_server.name or "mcp-server"
254
+ cursor_deeplink = generate_cursor_deeplink(server_name, port)
255
+
256
+ show_dev_ui(
257
+ live_trace_url=live_trace_url,
258
+ server_name=server_name,
259
+ port=port,
260
+ cursor_deeplink=cursor_deeplink,
261
+ is_docker=False,
262
+ )
263
+ else:
264
+ # Full UI for HTTP without trace, or stdio mode
265
+ show_dev_server_info(
266
+ server_name=mcp_server.name or "mcp-server",
267
+ port=port,
268
+ transport=transport,
269
+ inspector=inspector,
270
+ interactive=interactive,
271
+ env_dir=Path.cwd().parent if (Path.cwd().parent / "environment").exists() else None,
272
+ new=new,
273
+ )
233
274
 
234
275
  # Check if there's an environment backend and remind user to start it (first run only)
235
276
  if not is_reload:
@@ -238,9 +279,10 @@ async def run_mcp_module(
238
279
  if env_dir.exists() and (env_dir / "server.py").exists():
239
280
  hud_console.info("")
240
281
  hud_console.info(
241
- f"{hud_console.sym.FLOW} Don't forget to start the environment backend:"
282
+ f"{hud_console.sym.FLOW} Don't forget to start the environment backend in another "
283
+ "terminal:"
242
284
  )
243
- hud_console.info(" cd ../environment && uvicorn server:app --reload")
285
+ hud_console.info(" cd environment && uv run python uvicorn server:app --reload")
244
286
 
245
287
  # Launch inspector if requested (first run only)
246
288
  if inspector and transport == "http":
@@ -347,6 +389,7 @@ def run_with_reload(
347
389
  verbose: bool,
348
390
  inspector: bool,
349
391
  interactive: bool,
392
+ new: bool = False,
350
393
  ) -> None:
351
394
  """Run module with file watching and auto-reload."""
352
395
  try:
@@ -389,6 +432,11 @@ def run_with_reload(
389
432
 
390
433
  if verbose:
391
434
  cmd.append("--verbose")
435
+
436
+ if new:
437
+ cmd.append("--new")
438
+
439
+ if verbose:
392
440
  hud_console.info(f"Starting: {' '.join(cmd)}")
393
441
 
394
442
  # Mark as reload after first run to suppress logs
@@ -454,7 +502,12 @@ def run_with_reload(
454
502
 
455
503
 
456
504
  def run_docker_dev_server(
457
- port: int, verbose: bool, inspector: bool, interactive: bool, docker_args: list[str]
505
+ port: int,
506
+ verbose: bool,
507
+ inspector: bool,
508
+ interactive: bool,
509
+ docker_args: list[str],
510
+ new: bool = False,
458
511
  ) -> None:
459
512
  """Run MCP server in Docker with volume mounts, expose via local HTTP proxy."""
460
513
  import typer
@@ -462,6 +515,11 @@ def run_docker_dev_server(
462
515
 
463
516
  from hud.server import MCPServer
464
517
 
518
+ # Ensure Docker CLI and daemon are available before proceeding
519
+ from .utils.docker import require_docker_running
520
+
521
+ require_docker_running()
522
+
465
523
  cwd = Path.cwd()
466
524
 
467
525
  # Find environment directory (current or parent with hud.lock.yaml)
@@ -528,15 +586,6 @@ def run_docker_dev_server(
528
586
  env_dir=env_dir,
529
587
  )
530
588
 
531
- # Env flags already injected by create_docker_run_command
532
-
533
- # Print startup info
534
- hud_console.header("HUD Development Mode (Docker)")
535
-
536
- if verbose:
537
- hud_console.section_title("Docker Command")
538
- hud_console.info(" ".join(docker_cmd))
539
-
540
589
  # Create MCP config pointing to the Docker container's stdio
541
590
  mcp_config = {
542
591
  "docker": {
@@ -545,15 +594,62 @@ def run_docker_dev_server(
545
594
  }
546
595
  }
547
596
 
548
- # Show consistent server info
549
- show_dev_server_info(
550
- server_name=image_name,
551
- port=port,
552
- transport="http", # Docker mode always uses HTTP proxy
553
- inspector=inspector,
554
- interactive=interactive,
555
- env_dir=env_dir,
556
- )
597
+ # Attempt to create dynamic trace early (before any UI)
598
+ import asyncio as _asy
599
+
600
+ from hud.cli.flows.dev import create_dynamic_trace, generate_cursor_deeplink, show_dev_ui
601
+
602
+ live_trace_url: str | None = None
603
+ if new:
604
+ try:
605
+ local_mcp_config: dict[str, dict[str, Any]] = {
606
+ "hud": {
607
+ "url": f"http://localhost:{port}/mcp",
608
+ "headers": {},
609
+ }
610
+ }
611
+ live_trace_url = _asy.run(
612
+ create_dynamic_trace(
613
+ mcp_config=local_mcp_config,
614
+ build_status=True,
615
+ environment_name=image_name,
616
+ )
617
+ )
618
+ except Exception: # noqa: S110
619
+ pass
620
+
621
+ # Show appropriate UI
622
+ if live_trace_url and new:
623
+ # Minimal UI with live trace
624
+ cursor_deeplink = generate_cursor_deeplink(image_name, port)
625
+ show_dev_ui(
626
+ live_trace_url=live_trace_url,
627
+ server_name=image_name,
628
+ port=port,
629
+ cursor_deeplink=cursor_deeplink,
630
+ is_docker=True,
631
+ )
632
+ else:
633
+ # Full UI
634
+ hud_console.header("HUD Development Mode (Docker)")
635
+ if verbose:
636
+ hud_console.section_title("Docker Command")
637
+ hud_console.info(" ".join(docker_cmd))
638
+ show_dev_server_info(
639
+ server_name=image_name,
640
+ port=port,
641
+ transport="http",
642
+ inspector=inspector,
643
+ interactive=interactive,
644
+ env_dir=env_dir,
645
+ new=new,
646
+ )
647
+ hud_console.dim_info(
648
+ "",
649
+ "Container restarts on file changes (mounted volumes), "
650
+ "if changing tools run hud dev again",
651
+ )
652
+ hud_console.info("")
557
653
 
558
654
  # Suppress logs unless verbose
559
655
  if not verbose:
@@ -562,13 +658,6 @@ def run_docker_dev_server(
562
658
  logging.getLogger("uvicorn").setLevel(logging.ERROR)
563
659
  os.environ["FASTMCP_DISABLE_BANNER"] = "1"
564
660
 
565
- # Note about hot-reload behavior
566
- hud_console.dim_info(
567
- "",
568
- "Container restarts on file changes (mounted volumes), if changing tools run hud dev again",
569
- )
570
- hud_console.info("")
571
-
572
661
  # Create and run proxy with HUD helpers
573
662
  async def run_proxy() -> None:
574
663
  from fastmcp import FastMCP
@@ -617,6 +706,7 @@ def run_mcp_dev_server(
617
706
  watch: list[str] | None,
618
707
  docker: bool = False,
619
708
  docker_args: list[str] | None = None,
709
+ new: bool = False,
620
710
  ) -> None:
621
711
  """Run MCP development server with hot-reload."""
622
712
  docker_args = docker_args or []
@@ -627,12 +717,12 @@ def run_mcp_dev_server(
627
717
  hud_console.note("Detected Dockerfile - using Docker mode with volume mounts")
628
718
  hud_console.dim_info("Tip", "Use 'hud dev --help' to see all options")
629
719
  hud_console.info("")
630
- run_docker_dev_server(port, verbose, inspector, interactive, docker_args)
720
+ run_docker_dev_server(port, verbose, inspector, interactive, docker_args, new)
631
721
  return
632
722
 
633
723
  # Route to Docker mode if explicitly requested
634
724
  if docker:
635
- run_docker_dev_server(port, verbose, inspector, interactive, docker_args)
725
+ run_docker_dev_server(port, verbose, inspector, interactive, docker_args, new)
636
726
  return
637
727
 
638
728
  transport = "stdio" if stdio else "http"
@@ -676,6 +766,6 @@ def run_mcp_dev_server(
676
766
  is_child = os.environ.get("_HUD_DEV_CHILD") == "1"
677
767
 
678
768
  if is_child:
679
- asyncio.run(run_mcp_module(module, transport, port, verbose, False, False))
769
+ asyncio.run(run_mcp_module(module, transport, port, verbose, False, False, new))
680
770
  else:
681
- run_with_reload(module, watch_paths, transport, port, verbose, inspector, interactive)
771
+ run_with_reload(module, watch_paths, transport, port, verbose, inspector, interactive, new)
hud/cli/eval.py CHANGED
@@ -5,13 +5,14 @@ from __future__ import annotations
5
5
  import asyncio
6
6
  import logging
7
7
  from pathlib import Path
8
- from typing import TYPE_CHECKING, Any, Literal
8
+ from typing import TYPE_CHECKING, Any
9
9
 
10
10
  import typer
11
11
 
12
12
  import hud
13
13
  from hud.cli.utils.env_check import ensure_built, find_environment_dir
14
14
  from hud.settings import settings
15
+ from hud.types import AgentType
15
16
  from hud.utils.group_eval import display_group_statistics, run_tasks_grouped
16
17
  from hud.utils.hud_console import HUDConsole
17
18
 
@@ -21,6 +22,28 @@ logger = logging.getLogger(__name__)
21
22
  hud_console = HUDConsole()
22
23
 
23
24
 
25
+ def _tasks_use_local_mcp(tasks: list[Task]) -> bool:
26
+ """Return True if any task's MCP config uses a local command instead of a URL.
27
+
28
+ A config is considered local when a server entry contains a 'command' key and
29
+ does not provide a 'url'.
30
+ """
31
+ try:
32
+ for t in tasks:
33
+ cfg = getattr(t, "mcp_config", {}) or {}
34
+ if not isinstance(cfg, dict):
35
+ continue
36
+ for server_cfg in cfg.values():
37
+ if isinstance(server_cfg, dict) and (
38
+ "command" in server_cfg and not server_cfg.get("url")
39
+ ):
40
+ return True
41
+ return False
42
+ except Exception:
43
+ # Be conservative: if detection fails, do not block
44
+ return False
45
+
46
+
24
47
  def get_available_models() -> list[dict[str, str | None]]:
25
48
  """Fetch available models from the HUD API (only ready models).
26
49
 
@@ -113,7 +136,7 @@ def _build_vllm_config(
113
136
 
114
137
 
115
138
  def build_agent(
116
- agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"],
139
+ agent_type: AgentType,
117
140
  *,
118
141
  model: str | None = None,
119
142
  allowed_tools: list[str] | None = None,
@@ -123,11 +146,11 @@ def build_agent(
123
146
  """Create and return the requested agent type."""
124
147
 
125
148
  # Import agents lazily to avoid dependency issues
126
- if agent_type == "integration_test":
149
+ if agent_type == AgentType.INTEGRATION_TEST:
127
150
  from hud.agents.misc.integration_test_agent import IntegrationTestRunner
128
151
 
129
152
  return IntegrationTestRunner(verbose=verbose)
130
- elif agent_type == "vllm":
153
+ elif agent_type == AgentType.VLLM:
131
154
  # Create a generic OpenAI agent for vLLM server
132
155
  try:
133
156
  from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
@@ -147,7 +170,7 @@ def build_agent(
147
170
  )
148
171
  return GenericOpenAIChatAgent(**config)
149
172
 
150
- elif agent_type == "openai":
173
+ elif agent_type == AgentType.OPENAI:
151
174
  try:
152
175
  from hud.agents import OperatorAgent
153
176
  except ImportError as e:
@@ -165,7 +188,7 @@ def build_agent(
165
188
  else:
166
189
  return OperatorAgent(verbose=verbose)
167
190
 
168
- elif agent_type == "litellm":
191
+ elif agent_type == AgentType.LITELLM:
169
192
  try:
170
193
  from hud.agents.lite_llm import LiteAgent
171
194
  except ImportError as e:
@@ -209,7 +232,7 @@ def build_agent(
209
232
  async def run_single_task(
210
233
  source: str,
211
234
  *,
212
- agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"] = "claude",
235
+ agent_type: AgentType = AgentType.CLAUDE,
213
236
  model: str | None = None,
214
237
  allowed_tools: list[str] | None = None,
215
238
  max_steps: int = 10,
@@ -264,18 +287,44 @@ async def run_single_task(
264
287
  "Using first task from dataset (run with --full to run the entire dataset)..."
265
288
  )
266
289
 
267
- task_prompt = task.prompt[:50] + "..." if len(task.prompt) > 50 else task.prompt
290
+ # Warn/confirm if the task uses local MCP config
291
+ try:
292
+ if group_size > 1 and _tasks_use_local_mcp([task]):
293
+ hud_console.warning(
294
+ "Detected a local MCP configuration (uses 'command' instead of a 'url')."
295
+ )
296
+ hud_console.info(
297
+ "Ensure there are no exposed port conflicts during Docker runs/builds in eval."
298
+ )
299
+ proceed = hud_console.confirm(
300
+ "Proceed with running local MCP servers for this evaluation?",
301
+ default=True,
302
+ )
303
+ if not proceed:
304
+ # Provide a helpful next step
305
+ hud_console.hint("You can convert tasks to remote with: hud convert <tasks_file>")
306
+ raise typer.Exit(1)
307
+ # Always show the convert hint for awareness
308
+ hud_console.hint(
309
+ "Avoid local port conflicts by converting to remote: hud convert <tasks_file>"
310
+ )
311
+ except typer.Exit:
312
+ raise
313
+ except Exception as e:
314
+ hud_console.debug(f"Local MCP confirmation skipped due to error: {e}")
315
+
316
+ task_prompt = task.prompt
268
317
 
269
318
  # Use grouped evaluation if group_size > 1
270
319
  agent_config: dict[str, Any] = {}
271
- if agent_type == "integration_test":
320
+ if agent_type == AgentType.INTEGRATION_TEST:
272
321
  from hud.agents.misc.integration_test_agent import IntegrationTestRunner
273
322
 
274
323
  agent_class = IntegrationTestRunner
275
324
  agent_config = {"verbose": verbose}
276
325
  if allowed_tools:
277
326
  agent_config["allowed_tools"] = allowed_tools
278
- elif agent_type == "vllm":
327
+ elif agent_type == AgentType.VLLM:
279
328
  # Special handling for vLLM
280
329
  from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
281
330
 
@@ -288,14 +337,14 @@ async def run_single_task(
288
337
  allowed_tools=allowed_tools,
289
338
  verbose=verbose,
290
339
  )
291
- elif agent_type == "openai":
340
+ elif agent_type == AgentType.OPENAI:
292
341
  from hud.agents import OperatorAgent
293
342
 
294
343
  agent_class = OperatorAgent
295
344
  agent_config = {"verbose": verbose}
296
345
  if allowed_tools:
297
346
  agent_config["allowed_tools"] = allowed_tools
298
- elif agent_type == "litellm":
347
+ elif agent_type == AgentType.LITELLM:
299
348
  from hud.agents.lite_llm import LiteAgent
300
349
 
301
350
  agent_class = LiteAgent
@@ -305,7 +354,7 @@ async def run_single_task(
305
354
  }
306
355
  if allowed_tools:
307
356
  agent_config["allowed_tools"] = allowed_tools
308
- elif agent_type == "claude":
357
+ elif agent_type == AgentType.CLAUDE:
309
358
  from hud.agents import ClaudeAgent
310
359
 
311
360
  agent_class = ClaudeAgent
@@ -353,7 +402,7 @@ async def run_single_task(
353
402
  async def run_full_dataset(
354
403
  source: str,
355
404
  *,
356
- agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"] = "claude",
405
+ agent_type: AgentType = AgentType.CLAUDE,
357
406
  model: str | None = None,
358
407
  allowed_tools: list[str] | None = None,
359
408
  max_concurrent: int = 30,
@@ -386,6 +435,56 @@ async def run_full_dataset(
386
435
  hud_console.error(f"No tasks found in: {source}")
387
436
  raise typer.Exit(1)
388
437
 
438
+ # Warn/confirm once if any task uses local MCP config
439
+ try:
440
+ if _tasks_use_local_mcp(tasks):
441
+ hud_console.warning(
442
+ "Detected local MCP configurations (use 'command' instead of a 'url')."
443
+ )
444
+ hud_console.info(
445
+ "When running many tasks concurrently, exposed host ports from Docker may conflict."
446
+ )
447
+ proceed = hud_console.confirm(
448
+ "Proceed with running local MCP servers for this evaluation?",
449
+ default=True,
450
+ )
451
+ if not proceed:
452
+ # Helpful hint when source is a file path
453
+ try:
454
+ path = Path(source)
455
+ if path.exists():
456
+ hud_console.hint(
457
+ f"You can convert tasks to remote with: hud convert {path.name}"
458
+ )
459
+ else:
460
+ hud_console.hint(
461
+ "You can convert tasks to remote with: hud convert <tasks_file>"
462
+ )
463
+ except Exception:
464
+ hud_console.hint(
465
+ "You can convert tasks to remote with: hud convert <tasks_file>"
466
+ )
467
+ raise typer.Exit(1)
468
+ # Always show the convert hint for awareness
469
+ try:
470
+ path = Path(source)
471
+ if path.exists():
472
+ hud_console.hint(
473
+ f"Convert to remote to avoid port conflicts: hud convert {path.name}"
474
+ )
475
+ else:
476
+ hud_console.hint(
477
+ "Convert to remote to avoid port conflicts: hud convert <tasks_file>"
478
+ )
479
+ except Exception:
480
+ hud_console.hint(
481
+ "Convert to remote to avoid port conflicts: hud convert <tasks_file>"
482
+ )
483
+ except typer.Exit:
484
+ raise
485
+ except Exception as e:
486
+ hud_console.debug(f"Local MCP confirmation skipped due to error: {e}")
487
+
389
488
  # Convert Task objects to dicts for dataset runners
390
489
  dataset_or_tasks = [task.model_dump() for task in tasks]
391
490
 
@@ -395,12 +494,12 @@ async def run_full_dataset(
395
494
 
396
495
  # Build agent class + config for run_dataset
397
496
  agent_config: dict[str, Any]
398
- if agent_type == "integration_test": # --integration-test mode
497
+ if agent_type == AgentType.INTEGRATION_TEST: # --integration-test mode
399
498
  from hud.agents.misc.integration_test_agent import IntegrationTestRunner
400
499
 
401
500
  agent_class = IntegrationTestRunner
402
501
  agent_config = {"verbose": verbose}
403
- elif agent_type == "vllm":
502
+ elif agent_type == AgentType.VLLM:
404
503
  try:
405
504
  from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
406
505
 
@@ -419,7 +518,7 @@ async def run_full_dataset(
419
518
  allowed_tools=allowed_tools,
420
519
  verbose=verbose,
421
520
  )
422
- elif agent_type == "openai":
521
+ elif agent_type == AgentType.OPENAI:
423
522
  try:
424
523
  from hud.agents import OperatorAgent
425
524
 
@@ -435,7 +534,7 @@ async def run_full_dataset(
435
534
  if allowed_tools:
436
535
  agent_config["allowed_tools"] = allowed_tools
437
536
 
438
- elif agent_type == "litellm":
537
+ elif agent_type == AgentType.LITELLM:
439
538
  try:
440
539
  from hud.agents.lite_llm import LiteAgent
441
540
 
@@ -539,8 +638,8 @@ def eval_command(
539
638
  "--full",
540
639
  help="Run the entire dataset (omit for single-task debug mode)",
541
640
  ),
542
- agent: Literal["claude", "openai", "vllm", "litellm", "integration_test"] = typer.Option(
543
- "claude",
641
+ agent: AgentType = typer.Option( # noqa: B008
642
+ AgentType.CLAUDE,
544
643
  "--agent",
545
644
  help="Agent backend to use (claude, openai, vllm for local server, or litellm)",
546
645
  ),
@@ -648,21 +747,21 @@ def eval_command(
648
747
 
649
748
  # We pass integration_test as the agent_type
650
749
  if integration_test:
651
- agent = "integration_test"
750
+ agent = AgentType.INTEGRATION_TEST
652
751
 
653
752
  # Check for required API keys
654
- if agent == "claude":
753
+ if agent == AgentType.CLAUDE:
655
754
  if not settings.anthropic_api_key:
656
755
  hud_console.error("ANTHROPIC_API_KEY is required for Claude agent")
657
756
  hud_console.info(
658
757
  "Set it in your environment or run: hud set ANTHROPIC_API_KEY=your-key-here"
659
758
  )
660
759
  raise typer.Exit(1)
661
- elif agent == "openai" and not settings.openai_api_key:
760
+ elif agent == AgentType.OPENAI and not settings.openai_api_key:
662
761
  hud_console.error("OPENAI_API_KEY is required for OpenAI agent")
663
762
  hud_console.info("Set it in your environment or run: hud set OPENAI_API_KEY=your-key-here")
664
763
  raise typer.Exit(1)
665
- elif agent == "vllm":
764
+ elif agent == AgentType.VLLM:
666
765
  if model:
667
766
  hud_console.info(f"Using vLLM with model: {model}")
668
767
  else: