hud-python 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (88) hide show
  1. hud/__init__.py +13 -1
  2. hud/agents/base.py +14 -3
  3. hud/agents/lite_llm.py +1 -1
  4. hud/agents/openai_chat_generic.py +15 -3
  5. hud/agents/tests/test_base.py +9 -2
  6. hud/agents/tests/test_base_runtime.py +164 -0
  7. hud/cli/__init__.py +18 -25
  8. hud/cli/build.py +35 -27
  9. hud/cli/dev.py +11 -29
  10. hud/cli/eval.py +114 -145
  11. hud/cli/tests/test_analyze_module.py +120 -0
  12. hud/cli/tests/test_build.py +26 -3
  13. hud/cli/tests/test_build_failure.py +41 -0
  14. hud/cli/tests/test_build_module.py +50 -0
  15. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  16. hud/cli/tests/test_cli_root.py +134 -0
  17. hud/cli/tests/test_eval.py +4 -0
  18. hud/cli/tests/test_mcp_server.py +8 -7
  19. hud/cli/tests/test_push_happy.py +74 -0
  20. hud/cli/tests/test_push_wrapper.py +23 -0
  21. hud/cli/utils/docker.py +120 -1
  22. hud/cli/utils/runner.py +1 -1
  23. hud/cli/utils/tasks.py +4 -1
  24. hud/cli/utils/tests/__init__.py +0 -0
  25. hud/cli/utils/tests/test_config.py +58 -0
  26. hud/cli/utils/tests/test_docker.py +93 -0
  27. hud/cli/utils/tests/test_docker_hints.py +71 -0
  28. hud/cli/utils/tests/test_env_check.py +74 -0
  29. hud/cli/utils/tests/test_environment.py +42 -0
  30. hud/cli/utils/tests/test_interactive_module.py +60 -0
  31. hud/cli/utils/tests/test_local_runner.py +50 -0
  32. hud/cli/utils/tests/test_logging_utils.py +23 -0
  33. hud/cli/utils/tests/test_metadata.py +49 -0
  34. hud/cli/utils/tests/test_package_runner.py +35 -0
  35. hud/cli/utils/tests/test_registry_utils.py +49 -0
  36. hud/cli/utils/tests/test_remote_runner.py +25 -0
  37. hud/cli/utils/tests/test_runner_modules.py +52 -0
  38. hud/cli/utils/tests/test_source_hash.py +36 -0
  39. hud/cli/utils/tests/test_tasks.py +80 -0
  40. hud/cli/utils/version_check.py +257 -0
  41. hud/clients/base.py +1 -1
  42. hud/clients/mcp_use.py +3 -1
  43. hud/datasets/parallel.py +2 -2
  44. hud/datasets/runner.py +85 -24
  45. hud/datasets/tests/__init__.py +0 -0
  46. hud/datasets/tests/test_runner.py +106 -0
  47. hud/datasets/tests/test_utils.py +228 -0
  48. hud/otel/config.py +8 -6
  49. hud/otel/context.py +4 -4
  50. hud/otel/exporters.py +231 -57
  51. hud/otel/tests/__init__.py +0 -1
  52. hud/otel/tests/test_instrumentation.py +207 -0
  53. hud/rl/learner.py +1 -1
  54. hud/server/tests/test_server_extra.py +2 -0
  55. hud/shared/exceptions.py +35 -9
  56. hud/shared/hints.py +25 -0
  57. hud/shared/requests.py +15 -3
  58. hud/shared/tests/test_exceptions.py +39 -30
  59. hud/shared/tests/test_hints.py +167 -0
  60. hud/telemetry/__init__.py +30 -6
  61. hud/telemetry/async_context.py +331 -0
  62. hud/telemetry/job.py +51 -12
  63. hud/telemetry/tests/test_async_context.py +242 -0
  64. hud/telemetry/tests/test_instrument.py +414 -0
  65. hud/telemetry/tests/test_job.py +609 -0
  66. hud/telemetry/tests/test_trace.py +184 -6
  67. hud/telemetry/trace.py +16 -17
  68. hud/tools/computer/qwen.py +4 -1
  69. hud/tools/computer/settings.py +2 -2
  70. hud/tools/executors/base.py +4 -2
  71. hud/tools/tests/test_submit.py +85 -0
  72. hud/tools/tests/test_types.py +193 -0
  73. hud/types.py +7 -1
  74. hud/utils/agent_factories.py +1 -3
  75. hud/utils/mcp.py +1 -1
  76. hud/utils/task_tracking.py +223 -0
  77. hud/utils/tests/test_agent_factories.py +60 -0
  78. hud/utils/tests/test_mcp.py +4 -6
  79. hud/utils/tests/test_pretty_errors.py +186 -0
  80. hud/utils/tests/test_tasks.py +187 -0
  81. hud/utils/tests/test_tool_shorthand.py +154 -0
  82. hud/utils/tests/test_version.py +1 -1
  83. hud/version.py +1 -1
  84. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/METADATA +48 -48
  85. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/RECORD +88 -47
  86. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
  87. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
  88. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0
hud/cli/eval.py CHANGED
@@ -68,6 +68,50 @@ def get_available_models() -> list[dict[str, str | None]]:
68
68
  return []
69
69
 
70
70
 
71
+ def _build_vllm_config(
72
+ vllm_base_url: str | None,
73
+ model: str | None,
74
+ allowed_tools: list[str] | None,
75
+ verbose: bool,
76
+ ) -> dict[str, Any]:
77
+ """Build configuration for vLLM agent.
78
+
79
+ Args:
80
+ vllm_base_url: Optional base URL for vLLM server
81
+ model: Model name to use
82
+ allowed_tools: Optional list of allowed tools
83
+ verbose: Enable verbose output
84
+
85
+ Returns:
86
+ Dictionary with agent configuration
87
+ """
88
+ # Determine base URL and API key
89
+ if vllm_base_url is not None:
90
+ base_url = vllm_base_url
91
+ api_key = settings.api_key if base_url.startswith(settings.hud_rl_url) else "token-abc123"
92
+ hud_console.info(f"Using vLLM server at {base_url}")
93
+ else:
94
+ base_url = "http://localhost:8000/v1"
95
+ api_key = "token-abc123"
96
+
97
+ config: dict[str, Any] = {
98
+ "api_key": api_key,
99
+ "base_url": base_url,
100
+ "model_name": model or "served-model",
101
+ "verbose": verbose,
102
+ "completion_kwargs": {
103
+ "temperature": 0.7,
104
+ "max_tokens": 2048,
105
+ "tool_choice": "auto",
106
+ },
107
+ }
108
+
109
+ if allowed_tools:
110
+ config["allowed_tools"] = allowed_tools
111
+
112
+ return config
113
+
114
+
71
115
  def build_agent(
72
116
  agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"],
73
117
  *,
@@ -86,8 +130,6 @@ def build_agent(
86
130
  elif agent_type == "vllm":
87
131
  # Create a generic OpenAI agent for vLLM server
88
132
  try:
89
- from openai import AsyncOpenAI
90
-
91
133
  from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
92
134
  except ImportError as e:
93
135
  hud_console.error(
@@ -96,36 +138,14 @@ def build_agent(
96
138
  )
97
139
  raise typer.Exit(1) from e
98
140
 
99
- # Determine the base URL to use
100
- if vllm_base_url is not None:
101
- # Use the provided vLLM URL (for custom/local servers)
102
- base_url = vllm_base_url
103
- hud_console.info(f"Using vLLM server at {base_url}")
104
- api_key = (
105
- settings.api_key if base_url.startswith(settings.hud_rl_url) else "token-abc123"
106
- )
107
- else:
108
- # Default to localhost
109
- base_url = "http://localhost:8000/v1"
110
- api_key = "token-abc123"
111
-
112
- # Create OpenAI client for vLLM
113
- openai_client = AsyncOpenAI(
114
- base_url=base_url,
115
- api_key=api_key,
116
- timeout=30.0,
117
- )
118
-
119
- return GenericOpenAIChatAgent(
120
- openai_client=openai_client,
121
- model_name=model or "served-model", # Default model name
141
+ # Use the shared config builder
142
+ config = _build_vllm_config(
143
+ vllm_base_url=vllm_base_url,
144
+ model=model,
145
+ allowed_tools=allowed_tools,
122
146
  verbose=verbose,
123
- completion_kwargs={
124
- "temperature": 0.7,
125
- "max_tokens": 2048,
126
- "tool_choice": "required", # if self.actor_config.force_tool_choice else "auto",
127
- },
128
147
  )
148
+ return GenericOpenAIChatAgent(**config)
129
149
 
130
150
  elif agent_type == "openai":
131
151
  try:
@@ -257,25 +277,17 @@ async def run_single_task(
257
277
  agent_config["allowed_tools"] = allowed_tools
258
278
  elif agent_type == "vllm":
259
279
  # Special handling for vLLM
260
- sample_agent = build_agent(
261
- agent_type,
280
+ from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
281
+
282
+ agent_class = GenericOpenAIChatAgent
283
+
284
+ # Use the shared config builder
285
+ agent_config = _build_vllm_config(
286
+ vllm_base_url=vllm_base_url,
262
287
  model=model,
263
288
  allowed_tools=allowed_tools,
264
289
  verbose=verbose,
265
- vllm_base_url=vllm_base_url,
266
290
  )
267
- agent_config = {
268
- "openai_client": sample_agent.oai,
269
- "model_name": sample_agent.model_name,
270
- "verbose": verbose,
271
- "completion_kwargs": sample_agent.completion_kwargs,
272
- }
273
- if allowed_tools:
274
- agent_config["allowed_tools"] = allowed_tools
275
-
276
- from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
277
-
278
- agent_class = GenericOpenAIChatAgent
279
291
  elif agent_type == "openai":
280
292
  from hud.agents import OperatorAgent
281
293
 
@@ -300,6 +312,7 @@ async def run_single_task(
300
312
  agent_config = {
301
313
  "model": model or "claude-sonnet-4-20250514",
302
314
  "verbose": verbose,
315
+ "validate_api_key": False,
303
316
  }
304
317
  if allowed_tools:
305
318
  agent_config["allowed_tools"] = allowed_tools
@@ -345,24 +358,18 @@ async def run_full_dataset(
345
358
  allowed_tools: list[str] | None = None,
346
359
  max_concurrent: int = 30,
347
360
  max_steps: int = 10,
348
- parallel: bool = False,
349
- max_workers: int | None = None,
350
- max_concurrent_per_worker: int = 25,
351
361
  verbose: bool = False,
352
362
  vllm_base_url: str | None = None,
353
363
  group_size: int = 1,
354
364
  ) -> list[Any]:
355
- """Run evaluation across the entire dataset.
356
-
357
- Uses either asyncio-based run_dataset or process-based parallel execution
358
- depending on the parallel flag."""
365
+ """Run evaluation across the entire dataset using asyncio-based concurrency."""
359
366
 
360
367
  # Provide early feedback to user
361
368
  hud_console.info("🔧 Initializing evaluation...")
362
369
 
363
370
  # Import run_dataset lazily
364
371
  try:
365
- from hud.datasets import run_dataset, run_dataset_parallel, run_dataset_parallel_manual
372
+ from hud.datasets import run_dataset
366
373
  from hud.utils.tasks import load_tasks
367
374
  except ImportError as e:
368
375
  hud_console.error(
@@ -387,6 +394,7 @@ async def run_full_dataset(
387
394
  dataset_name = f"Dataset: {path.name}" if path.exists() else source.split("/")[-1]
388
395
 
389
396
  # Build agent class + config for run_dataset
397
+ agent_config: dict[str, Any]
390
398
  if agent_type == "integration_test": # --integration-test mode
391
399
  from hud.agents.misc.integration_test_agent import IntegrationTestRunner
392
400
 
@@ -404,24 +412,13 @@ async def run_full_dataset(
404
412
  )
405
413
  raise typer.Exit(1) from e
406
414
 
407
- # Use build_agent to create a sample agent to get the config
408
- sample_agent = build_agent(
409
- agent_type,
415
+ # Use the shared config builder
416
+ agent_config = _build_vllm_config(
417
+ vllm_base_url=vllm_base_url,
410
418
  model=model,
411
419
  allowed_tools=allowed_tools,
412
420
  verbose=verbose,
413
- vllm_base_url=vllm_base_url,
414
421
  )
415
-
416
- # Extract the config from the sample agent
417
- agent_config: dict[str, Any] = {
418
- "openai_client": sample_agent.oai,
419
- "model_name": sample_agent.model_name,
420
- "verbose": verbose,
421
- "completion_kwargs": sample_agent.completion_kwargs,
422
- }
423
- if allowed_tools:
424
- agent_config["allowed_tools"] = allowed_tools
425
422
  elif agent_type == "openai":
426
423
  try:
427
424
  from hud.agents import OperatorAgent
@@ -434,7 +431,7 @@ async def run_full_dataset(
434
431
  )
435
432
  raise typer.Exit(1) from e
436
433
 
437
- agent_config = {"verbose": verbose}
434
+ agent_config = {"verbose": verbose, "validate_api_key": False}
438
435
  if allowed_tools:
439
436
  agent_config["allowed_tools"] = allowed_tools
440
437
 
@@ -472,6 +469,7 @@ async def run_full_dataset(
472
469
  agent_config = {
473
470
  "model": model or "claude-sonnet-4-20250514",
474
471
  "verbose": verbose,
472
+ "validate_api_key": False,
475
473
  }
476
474
  if allowed_tools:
477
475
  agent_config["allowed_tools"] = allowed_tools
@@ -505,9 +503,7 @@ async def run_full_dataset(
505
503
  agent_class=agent_class,
506
504
  agent_config=agent_config,
507
505
  group_size=group_size,
508
- max_parallel_episodes=max_concurrent
509
- if not parallel
510
- else max_concurrent_per_worker * (max_workers or 4),
506
+ max_parallel_episodes=max_concurrent,
511
507
  max_steps=max_steps,
512
508
  verbose=verbose,
513
509
  job_id=job.id,
@@ -519,48 +515,18 @@ async def run_full_dataset(
519
515
  # Return stats for consistency with other modes
520
516
  return stats
521
517
 
522
- # Original logic for non-grouped evaluation
523
- elif parallel:
524
- hud_console.info(
525
- f"🚀 Running PARALLEL evaluation (workers: {max_workers or 'auto'}, max_concurrent: {max_concurrent})…" # noqa: E501
526
- )
527
- if max_workers is None:
528
- # Use auto-optimization (now the default run_dataset_parallel)
529
- return await run_dataset_parallel(
530
- name=f"Evaluation {dataset_name}",
531
- dataset=dataset_or_tasks,
532
- agent_class=agent_class,
533
- agent_config=agent_config,
534
- max_concurrent=max_concurrent,
535
- metadata={"dataset": source, "parallel": True},
536
- max_steps=max_steps,
537
- auto_respond=True,
538
- )
539
- else:
540
- # Use manual configuration
541
- return await run_dataset_parallel_manual(
542
- name=f"Evaluation {dataset_name}",
543
- dataset=dataset_or_tasks,
544
- agent_class=agent_class,
545
- agent_config=agent_config,
546
- max_workers=max_workers,
547
- max_concurrent_per_worker=max_concurrent_per_worker,
548
- max_concurrent=max_concurrent,
549
- metadata={"dataset": source, "parallel": True},
550
- max_steps=max_steps,
551
- auto_respond=True,
552
- )
553
- else:
554
- hud_console.info(f"🚀 Running evaluation (max_concurrent: {max_concurrent})…")
555
- return await run_dataset(
556
- name=f"Evaluation {dataset_name}",
557
- dataset=dataset_or_tasks,
558
- agent_class=agent_class,
559
- agent_config=agent_config,
560
- max_concurrent=max_concurrent,
561
- metadata={"dataset": source},
562
- max_steps=max_steps,
563
- )
518
+ # Run evaluation with asyncio-based concurrency
519
+ hud_console.info(f"🚀 Running evaluation (max_concurrent: {max_concurrent})…")
520
+ return await run_dataset(
521
+ name=f"Evaluation {dataset_name}",
522
+ dataset=dataset_or_tasks,
523
+ agent_class=agent_class,
524
+ agent_config=agent_config,
525
+ max_concurrent=max_concurrent,
526
+ metadata={"dataset": source},
527
+ max_steps=max_steps,
528
+ auto_respond=True,
529
+ )
564
530
 
565
531
 
566
532
  def eval_command(
@@ -591,31 +557,20 @@ def eval_command(
591
557
  max_concurrent: int = typer.Option(
592
558
  30,
593
559
  "--max-concurrent",
594
- help="Concurrency level for asyncio mode (ignored in parallel mode)",
560
+ help=(
561
+ "Maximum concurrent tasks (1-200 recommended, prevents rate limits "
562
+ "and resource exhaustion)"
563
+ ),
595
564
  ),
596
565
  max_steps: int | None = typer.Option(
597
566
  None,
598
567
  "--max-steps",
599
568
  help="Maximum steps per task (default: 10 for single, 50 for full)",
600
569
  ),
601
- parallel: bool = typer.Option(
602
- False,
603
- "--parallel",
604
- help="Use process-based parallel execution for large datasets (100+ tasks)",
605
- ),
606
- max_workers: int | None = typer.Option(
607
- None,
608
- "--max-workers",
609
- help="Number of worker processes for parallel mode (auto-optimized if not set)",
610
- ),
611
- max_concurrent_per_worker: int = typer.Option(
612
- 20,
613
- "--max-concurrent-per-worker",
614
- help="Maximum concurrent tasks per worker in parallel mode",
615
- ),
616
570
  verbose: bool = typer.Option(
617
571
  False,
618
572
  "--verbose",
573
+ "-v",
619
574
  help="Enable verbose output from the agent",
620
575
  ),
621
576
  very_verbose: bool = typer.Option(
@@ -650,23 +605,20 @@ def eval_command(
650
605
  # Evaluate a single task from SheetBench
651
606
  hud eval hud-evals/SheetBench-50
652
607
 
653
- # Evaluate the FULL SheetBench dataset with Claude (asyncio mode)
608
+ # Evaluate the FULL SheetBench dataset with Claude
654
609
  hud eval hud-evals/SheetBench-50 --full --agent claude
655
610
 
656
- # Run large dataset with PARALLEL execution (auto-optimized)
657
- hud eval hud-evals/OSWorld-Verified-Gold --full --parallel
611
+ # Run with higher concurrency for faster evaluation
612
+ hud eval hud-evals/OSWorld-Verified-Gold --full --max-concurrent 100
658
613
 
659
- # Parallel mode with manual configuration (16 workers, 25 tasks each)
660
- hud eval hud-evals/OSWorld-Verified-Gold --full --parallel --max-workers 16
661
-
662
- # Limit total concurrent tasks to prevent rate limits
663
- hud eval hud-evals/SheetBench-50 --full --parallel --max-concurrent 20
614
+ # Limit concurrent tasks to prevent rate limits
615
+ hud eval hud-evals/SheetBench-50 --full --max-concurrent 20
664
616
 
665
617
  # Run a single task from a JSON file
666
618
  hud eval task.json
667
619
 
668
- # Run multiple tasks from a JSON file with parallel execution
669
- hud eval tasks.json --full --parallel
620
+ # Run multiple tasks from a JSON file
621
+ hud eval tasks.json --full
670
622
 
671
623
  # Run with OpenAI Operator agent
672
624
  hud eval hud-evals/OSWorld-Gold-Beta --agent openai
@@ -680,8 +632,6 @@ def eval_command(
680
632
  # Run with verbose output for debugging
681
633
  hud eval task.json --verbose
682
634
  """
683
- from hud.settings import settings
684
-
685
635
  # Always configure basic logging so agent steps can be logged
686
636
  # Set to INFO by default for consistency with run_evaluation.py
687
637
  if very_verbose:
@@ -736,7 +686,11 @@ def eval_command(
736
686
 
737
687
  # Run evaluation
738
688
  if full:
739
- asyncio.run(
689
+ import time
690
+
691
+ start_time = time.time()
692
+
693
+ results = asyncio.run(
740
694
  run_full_dataset(
741
695
  source,
742
696
  agent_type=agent,
@@ -744,14 +698,29 @@ def eval_command(
744
698
  allowed_tools=allowed_tools_list,
745
699
  max_concurrent=max_concurrent,
746
700
  max_steps=max_steps,
747
- parallel=parallel,
748
- max_workers=max_workers,
749
- max_concurrent_per_worker=max_concurrent_per_worker,
750
701
  verbose=very_verbose or verbose,
751
702
  vllm_base_url=vllm_base_url,
752
703
  group_size=group_size,
753
704
  )
754
705
  )
706
+
707
+ elapsed = time.time() - start_time
708
+
709
+ # Print statistics (only for non-grouped mode)
710
+ if group_size == 1 and results:
711
+ hud_console.info("\n" + "=" * 50)
712
+ hud_console.success("📊 Evaluation Complete!")
713
+ hud_console.info("=" * 50)
714
+ hud_console.info(f"Total tasks: {len(results)}")
715
+ hud_console.info(f"Time elapsed: {elapsed:.2f} seconds")
716
+ hud_console.info(f"Throughput: {len(results) / elapsed:.2f} tasks/second")
717
+ hud_console.info(f"Execution mode: ASYNCIO (max_concurrent: {max_concurrent})")
718
+
719
+ # Count successes
720
+ successful = sum(1 for r in results if getattr(r, "reward", 0) > 0.7)
721
+ success_rate = 100 * successful / len(results)
722
+ hud_console.info(f"Successful tasks: {successful}/{len(results)} ({success_rate:.1f}%)")
723
+ hud_console.info("=" * 50)
755
724
  else:
756
725
  asyncio.run(
757
726
  run_single_task(
@@ -0,0 +1,120 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+ from unittest.mock import AsyncMock, MagicMock, patch
5
+
6
+ import pytest
7
+
8
+ from hud.cli.analyze import (
9
+ analyze_environment,
10
+ analyze_environment_from_config,
11
+ analyze_environment_from_mcp_config,
12
+ display_interactive,
13
+ display_markdown,
14
+ parse_docker_command,
15
+ )
16
+
17
+ if TYPE_CHECKING:
18
+ from pathlib import Path
19
+
20
+
21
+ # Mark entire module as asyncio to ensure async tests run with pytest-asyncio
22
+ pytestmark = pytest.mark.asyncio
23
+
24
+
25
+ def test_parse_docker_command():
26
+ cmd = ["docker", "run", "--rm", "-i", "img"]
27
+ cfg = parse_docker_command(cmd)
28
+ assert cfg == {"local": {"command": "docker", "args": ["run", "--rm", "-i", "img"]}}
29
+
30
+
31
+ @pytest.mark.asyncio
32
+ @patch("hud.cli.analyze.MCPClient")
33
+ @patch("hud.cli.analyze.console")
34
+ async def test_analyze_environment_success_json(mock_console, MockClient):
35
+ client = AsyncMock()
36
+ client.initialize.return_value = None
37
+ client.analyze_environment.return_value = {"tools": [], "resources": []}
38
+ client.shutdown.return_value = None
39
+ MockClient.return_value = client
40
+
41
+ await analyze_environment(["docker", "run", "img"], output_format="json", verbose=False)
42
+ assert client.initialize.awaited
43
+ assert client.analyze_environment.awaited
44
+ assert client.shutdown.awaited
45
+ assert mock_console.print_json.called
46
+
47
+
48
+ @pytest.mark.asyncio
49
+ @patch("hud.cli.analyze.MCPClient")
50
+ @patch("hud.cli.analyze.console")
51
+ async def test_analyze_environment_failure(mock_console, MockClient):
52
+ client = AsyncMock()
53
+ client.initialize.side_effect = RuntimeError("boom")
54
+ client.shutdown.return_value = None
55
+ MockClient.return_value = client
56
+
57
+ # Should swallow exception and return without raising
58
+ await analyze_environment(["docker", "run", "img"], output_format="json", verbose=True)
59
+ assert client.shutdown.awaited
60
+ assert mock_console.print_json.called is False
61
+
62
+
63
+ def test_display_interactive_metadata_only(monkeypatch):
64
+ import hud.cli.analyze as mod
65
+
66
+ monkeypatch.setattr(mod, "console", MagicMock(), raising=False)
67
+ monkeypatch.setattr(mod, "hud_console", MagicMock(), raising=False)
68
+
69
+ analysis = {
70
+ "image": "img:latest",
71
+ "status": "cached",
72
+ "tool_count": 2,
73
+ "tools": [
74
+ {"name": "t1", "description": "d1", "inputSchema": {"type": "object"}},
75
+ {"name": "t2", "description": "d2"},
76
+ ],
77
+ "resources": [],
78
+ }
79
+ display_interactive(analysis)
80
+
81
+
82
+ def test_display_markdown_both_paths(capsys):
83
+ # metadata-only
84
+ md_only = {"image": "img:latest", "tool_count": 0, "tools": [], "resources": []}
85
+ display_markdown(md_only)
86
+
87
+ # live metadata
88
+ live = {"metadata": {"servers": ["s1"], "initialized": True}, "tools": [], "resources": []}
89
+ display_markdown(live)
90
+
91
+ # Check that output was generated
92
+ captured = capsys.readouterr()
93
+ assert "MCP Environment Analysis" in captured.out
94
+
95
+
96
+ @patch("hud.cli.analyze.MCPClient")
97
+ async def test_analyze_environment_from_config(MockClient, tmp_path: Path):
98
+ client = AsyncMock()
99
+ client.initialize.return_value = None
100
+ client.analyze_environment.return_value = {"tools": [], "resources": []}
101
+ client.shutdown.return_value = None
102
+ MockClient.return_value = client
103
+
104
+ cfg = tmp_path / "mcp.json"
105
+ cfg.write_text('{"local": {"command": "docker", "args": ["run", "img"]}}')
106
+ await analyze_environment_from_config(cfg, output_format="json", verbose=False)
107
+ assert client.initialize.awaited and client.shutdown.awaited
108
+
109
+
110
+ @patch("hud.cli.analyze.MCPClient")
111
+ async def test_analyze_environment_from_mcp_config(MockClient):
112
+ client = AsyncMock()
113
+ client.initialize.return_value = None
114
+ client.analyze_environment.return_value = {"tools": [], "resources": []}
115
+ client.shutdown.return_value = None
116
+ MockClient.return_value = client
117
+
118
+ mcp_config = {"local": {"command": "docker", "args": ["run", "img"]}}
119
+ await analyze_environment_from_mcp_config(mcp_config, output_format="json", verbose=False)
120
+ assert client.initialize.awaited and client.shutdown.awaited
@@ -219,6 +219,17 @@ class TestAnalyzeMcpEnvironment:
219
219
  mock_tool.description = "Test tool"
220
220
  mock_tool.inputSchema = {"type": "object"}
221
221
 
222
+ # Prefer analyze_environment path (aligns with analyze CLI tests)
223
+ mock_client.analyze_environment = mock.AsyncMock(
224
+ return_value={
225
+ "metadata": {"servers": ["local"], "initialized": True},
226
+ "tools": [{"name": "test_tool", "description": "Test tool"}],
227
+ "hub_tools": {},
228
+ "resources": [],
229
+ "telemetry": {},
230
+ }
231
+ )
232
+ # Fallback still defined for completeness
222
233
  mock_client.list_tools.return_value = [mock_tool]
223
234
 
224
235
  result = await analyze_mcp_environment("test:latest")
@@ -237,7 +248,9 @@ class TestAnalyzeMcpEnvironment:
237
248
  mock_client_class.return_value = mock_client
238
249
  mock_client.initialize.side_effect = ConnectionError("Connection failed")
239
250
 
240
- with pytest.raises(ConnectionError):
251
+ from hud.shared.exceptions import HudException
252
+
253
+ with pytest.raises(HudException, match="Connection failed"):
241
254
  await analyze_mcp_environment("test:latest")
242
255
 
243
256
  @mock.patch("hud.cli.build.MCPClient")
@@ -245,6 +258,15 @@ class TestAnalyzeMcpEnvironment:
245
258
  """Test analysis in verbose mode."""
246
259
  mock_client = mock.AsyncMock()
247
260
  mock_client_class.return_value = mock_client
261
+ mock_client.analyze_environment = mock.AsyncMock(
262
+ return_value={
263
+ "metadata": {"servers": ["local"], "initialized": True},
264
+ "tools": [],
265
+ "hub_tools": {},
266
+ "resources": [],
267
+ "telemetry": {},
268
+ }
269
+ )
248
270
  mock_client.list_tools.return_value = []
249
271
 
250
272
  # Just test that it runs without error in verbose mode
@@ -363,7 +385,7 @@ ENV API_KEY
363
385
  mock_run.return_value = mock_result
364
386
 
365
387
  # Run build
366
- build_environment(str(env_dir), "test/env:latest")
388
+ build_environment(str(env_dir), "test-env:latest")
367
389
 
368
390
  # Check lock file was created
369
391
  lock_file = env_dir / "hud.lock.yaml"
@@ -373,7 +395,8 @@ ENV API_KEY
373
395
  with open(lock_file) as f:
374
396
  lock_data = yaml.safe_load(f)
375
397
 
376
- assert lock_data["image"] == "test/env:latest@sha256:abc123"
398
+ assert lock_data["images"]["full"] == "test-env:0.1.0@sha256:abc123"
399
+ assert lock_data["images"]["local"] == "test-env:0.1.0"
377
400
  assert lock_data["build"]["version"] == "0.1.0"
378
401
  assert lock_data["environment"]["toolCount"] == 2
379
402
  assert len(lock_data["tools"]) == 2
@@ -0,0 +1,41 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+ from unittest.mock import patch
5
+
6
+ import pytest
7
+ import typer
8
+
9
+ from hud.cli.build import build_environment
10
+
11
+ if TYPE_CHECKING:
12
+ from pathlib import Path
13
+
14
+
15
+ @patch("hud.cli.build.compute_source_hash", return_value="deadbeef")
16
+ @patch(
17
+ "hud.cli.build.analyze_mcp_environment",
18
+ return_value={"initializeMs": 10, "toolCount": 0, "tools": []},
19
+ )
20
+ @patch("hud.cli.build.build_docker_image", return_value=True)
21
+ def test_build_label_rebuild_failure(_bd, _an, _hash, tmp_path: Path, monkeypatch):
22
+ # Minimal environment dir
23
+ env = tmp_path / "env"
24
+ env.mkdir()
25
+ (env / "Dockerfile").write_text("FROM python:3.11")
26
+
27
+ # Ensure subprocess.run returns non-zero for the second build (label build)
28
+ import types
29
+
30
+ def run_side_effect(cmd, *a, **k):
31
+ # Return 0 for first docker build, 1 for label build
32
+ if isinstance(cmd, list) and cmd[:2] == ["docker", "build"] and "--label" in cmd:
33
+ return types.SimpleNamespace(returncode=1, stderr="boom")
34
+ return types.SimpleNamespace(returncode=0, stdout="")
35
+
36
+ monkeypatch.setenv("FASTMCP_DISABLE_BANNER", "1")
37
+ with (
38
+ patch("hud.cli.build.subprocess.run", side_effect=run_side_effect),
39
+ pytest.raises(typer.Exit),
40
+ ):
41
+ build_environment(str(env), verbose=False)
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+ from unittest import mock
5
+
6
+ from hud.cli.build import (
7
+ extract_env_vars_from_dockerfile,
8
+ get_docker_image_digest,
9
+ get_docker_image_id,
10
+ )
11
+
12
+ if TYPE_CHECKING:
13
+ from pathlib import Path
14
+
15
+
16
+ def test_extract_env_vars_from_dockerfile_complex(tmp_path: Path):
17
+ dockerfile = tmp_path / "Dockerfile"
18
+ dockerfile.write_text(
19
+ """
20
+ FROM python:3.11
21
+ ARG BUILD_TOKEN
22
+ ARG DEFAULTED=1
23
+ ENV RUNTIME_KEY
24
+ ENV FROM_ARG=$BUILD_TOKEN
25
+ ENV WITH_DEFAULT=val
26
+ """
27
+ )
28
+ required, optional = extract_env_vars_from_dockerfile(dockerfile)
29
+ # BUILD_TOKEN required (ARG without default)
30
+ assert "BUILD_TOKEN" in required
31
+ # RUNTIME_KEY required (ENV without value)
32
+ assert "RUNTIME_KEY" in required
33
+ # FROM_ARG references BUILD_TOKEN -> required
34
+ assert "FROM_ARG" in required
35
+ # DEFAULTED and WITH_DEFAULT should not be marked required by default
36
+ assert "DEFAULTED" not in required
37
+ assert "WITH_DEFAULT" not in required
38
+ assert optional == []
39
+
40
+
41
+ @mock.patch("subprocess.run")
42
+ def test_get_docker_image_digest_none(mock_run):
43
+ mock_run.return_value = mock.Mock(stdout="[]", returncode=0)
44
+ assert get_docker_image_digest("img") is None
45
+
46
+
47
+ @mock.patch("subprocess.run")
48
+ def test_get_docker_image_id_ok(mock_run):
49
+ mock_run.return_value = mock.Mock(stdout="sha256:abc", returncode=0)
50
+ assert get_docker_image_id("img") == "sha256:abc"