hud-python 0.4.10__py3-none-any.whl → 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (63) hide show
  1. hud/__main__.py +8 -0
  2. hud/agents/base.py +7 -8
  3. hud/agents/langchain.py +2 -2
  4. hud/agents/tests/test_openai.py +3 -1
  5. hud/cli/__init__.py +106 -51
  6. hud/cli/build.py +121 -71
  7. hud/cli/debug.py +2 -2
  8. hud/cli/{mcp_server.py → dev.py} +60 -25
  9. hud/cli/eval.py +148 -68
  10. hud/cli/init.py +0 -1
  11. hud/cli/list_func.py +72 -71
  12. hud/cli/pull.py +1 -2
  13. hud/cli/push.py +35 -23
  14. hud/cli/remove.py +35 -41
  15. hud/cli/tests/test_analyze.py +2 -1
  16. hud/cli/tests/test_analyze_metadata.py +42 -49
  17. hud/cli/tests/test_build.py +28 -52
  18. hud/cli/tests/test_cursor.py +1 -1
  19. hud/cli/tests/test_debug.py +1 -1
  20. hud/cli/tests/test_list_func.py +75 -64
  21. hud/cli/tests/test_main_module.py +30 -0
  22. hud/cli/tests/test_mcp_server.py +3 -3
  23. hud/cli/tests/test_pull.py +30 -61
  24. hud/cli/tests/test_push.py +70 -89
  25. hud/cli/tests/test_registry.py +36 -38
  26. hud/cli/tests/test_utils.py +1 -1
  27. hud/cli/utils/__init__.py +1 -0
  28. hud/cli/{docker_utils.py → utils/docker.py} +36 -0
  29. hud/cli/{env_utils.py → utils/environment.py} +7 -7
  30. hud/cli/{interactive.py → utils/interactive.py} +91 -19
  31. hud/cli/{analyze_metadata.py → utils/metadata.py} +12 -8
  32. hud/cli/{registry.py → utils/registry.py} +28 -30
  33. hud/cli/{remote_runner.py → utils/remote_runner.py} +1 -1
  34. hud/cli/utils/runner.py +134 -0
  35. hud/cli/utils/server.py +250 -0
  36. hud/clients/base.py +1 -1
  37. hud/clients/fastmcp.py +7 -5
  38. hud/clients/mcp_use.py +8 -6
  39. hud/server/server.py +34 -4
  40. hud/shared/exceptions.py +11 -0
  41. hud/shared/tests/test_exceptions.py +22 -0
  42. hud/telemetry/tests/__init__.py +0 -0
  43. hud/telemetry/tests/test_replay.py +40 -0
  44. hud/telemetry/tests/test_trace.py +63 -0
  45. hud/tools/base.py +20 -3
  46. hud/tools/computer/hud.py +15 -6
  47. hud/tools/executors/tests/test_base_executor.py +27 -0
  48. hud/tools/response.py +15 -4
  49. hud/tools/tests/test_response.py +60 -0
  50. hud/tools/tests/test_tools_init.py +49 -0
  51. hud/utils/design.py +19 -8
  52. hud/utils/mcp.py +17 -5
  53. hud/utils/tests/test_mcp.py +112 -0
  54. hud/utils/tests/test_version.py +1 -1
  55. hud/version.py +1 -1
  56. {hud_python-0.4.10.dist-info → hud_python-0.4.12.dist-info}/METADATA +14 -10
  57. {hud_python-0.4.10.dist-info → hud_python-0.4.12.dist-info}/RECORD +62 -52
  58. hud/cli/runner.py +0 -160
  59. /hud/cli/{cursor.py → utils/cursor.py} +0 -0
  60. /hud/cli/{utils.py → utils/logging.py} +0 -0
  61. {hud_python-0.4.10.dist-info → hud_python-0.4.12.dist-info}/WHEEL +0 -0
  62. {hud_python-0.4.10.dist-info → hud_python-0.4.12.dist-info}/entry_points.txt +0 -0
  63. {hud_python-0.4.10.dist-info → hud_python-0.4.12.dist-info}/licenses/LICENSE +0 -0
hud/__main__.py ADDED
@@ -0,0 +1,8 @@
1
+ """Allow running CLI with python -m hud."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from hud.cli import main
6
+
7
+ if __name__ == "__main__":
8
+ main()
hud/agents/base.py CHANGED
@@ -306,7 +306,7 @@ class MCPAgent(ABC):
306
306
  if decision == "STOP":
307
307
  # Try to submit response through lifecycle tool
308
308
  await self._maybe_submit_response(response, messages)
309
-
309
+
310
310
  logger.info("Stopping execution")
311
311
  final_response = response
312
312
  break
@@ -487,7 +487,7 @@ class MCPAgent(ABC):
487
487
  self._available_tools.append(tool)
488
488
  # Simplified mapping - just tool name to tool
489
489
  self._tool_map[tool.name] = tool
490
-
490
+
491
491
  # Auto-detect response tool as a lifecycle tool
492
492
  if tool.name == "response" and "response" not in self.lifecycle_tools:
493
493
  logger.debug("Auto-detected 'response' tool as a lifecycle tool")
@@ -495,7 +495,7 @@ class MCPAgent(ABC):
495
495
 
496
496
  async def _maybe_submit_response(self, response: AgentResponse, messages: list[Any]) -> None:
497
497
  """Submit response through lifecycle tool if available.
498
-
498
+
499
499
  Args:
500
500
  response: The agent's response
501
501
  messages: The current message history (will be modified in-place)
@@ -506,17 +506,16 @@ class MCPAgent(ABC):
506
506
  try:
507
507
  # Call the response tool with the agent's response
508
508
  response_tool_call = MCPToolCall(
509
- name="response",
510
- arguments={"response": response.content, "messages": messages}
509
+ name="response", arguments={"response": response.content, "messages": messages}
511
510
  )
512
511
  response_results = await self.call_tools(response_tool_call)
513
-
512
+
514
513
  # Format and add the response tool results to messages
515
514
  response_messages = await self.format_tool_results(
516
515
  [response_tool_call], response_results
517
516
  )
518
517
  messages.extend(response_messages)
519
-
518
+
520
519
  # Mark the task as done
521
520
  logger.info("Response lifecycle tool executed, marking task as done")
522
521
  except Exception as e:
@@ -579,7 +578,7 @@ class MCPAgent(ABC):
579
578
  logger.warning("Failed to close auto-created trace: %s", e)
580
579
  finally:
581
580
  self._auto_trace_cm = None
582
-
581
+
583
582
  # Clean up auto-created client
584
583
  if self._auto_created_client and self.mcp_client:
585
584
  try:
hud/agents/langchain.py CHANGED
@@ -15,10 +15,10 @@ import hud
15
15
  if TYPE_CHECKING:
16
16
  from langchain.schema.language_model import BaseLanguageModel
17
17
  from langchain_core.tools import BaseTool
18
- from mcp_use.adapters.langchain_adapter import LangChainAdapter
18
+ from mcp_use.adapters.langchain_adapter import LangChainAdapter # type: ignore[attr-defined]
19
19
 
20
20
  try:
21
- from mcp_use.adapters.langchain_adapter import LangChainAdapter
21
+ from mcp_use.adapters.langchain_adapter import LangChainAdapter # type: ignore[attr-defined]
22
22
  except ImportError:
23
23
  LangChainAdapter = None # type: ignore[misc, assignment]
24
24
 
@@ -17,7 +17,9 @@ class TestOperatorAgent:
17
17
  @pytest.fixture
18
18
  def mock_mcp_client(self):
19
19
  """Create a mock MCP client."""
20
- mcp_client = MagicMock()
20
+ mcp_client = AsyncMock()
21
+ # Set up the mcp_config attribute as a regular dict, not a coroutine
22
+ mcp_client.mcp_config = {"test_server": {"url": "http://test"}}
21
23
  return mcp_client
22
24
 
23
25
  @pytest.fixture
hud/cli/__init__.py CHANGED
@@ -4,15 +4,15 @@ from __future__ import annotations
4
4
 
5
5
  import asyncio
6
6
  import json
7
- import os
8
7
  import sys
9
- from pathlib import Path # noqa: TC003
8
+ from pathlib import Path
10
9
 
11
10
  import typer
12
11
  from rich.console import Console
13
12
  from rich.panel import Panel
14
13
  from rich.table import Table
15
14
 
15
+ from . import list_func as list_module
16
16
  from .analyze import (
17
17
  analyze_environment,
18
18
  analyze_environment_from_config,
@@ -20,15 +20,14 @@ from .analyze import (
20
20
  )
21
21
  from .build import build_command
22
22
  from .clone import clone_repository, get_clone_message, print_error, print_tutorial
23
- from .cursor import get_cursor_config_path, list_cursor_servers, parse_cursor_config
24
23
  from .debug import debug_mcp_stdio
24
+ from .dev import run_mcp_dev_server
25
25
  from .init import create_environment
26
- from . import list_func as list_module
27
- from .mcp_server import run_mcp_dev_server
28
26
  from .pull import pull_command
29
27
  from .push import push_command
30
28
  from .remove import remove_command
31
- from .utils import CaptureLogger
29
+ from .utils.cursor import get_cursor_config_path, list_cursor_servers, parse_cursor_config
30
+ from .utils.logging import CaptureLogger
32
31
 
33
32
  # Create the main Typer app
34
33
  app = typer.Typer(
@@ -113,7 +112,7 @@ def analyze(
113
112
  asyncio.run(analyze_environment(docker_cmd, output_format, verbose))
114
113
  else:
115
114
  # Fast mode - analyze from metadata
116
- from .analyze_metadata import analyze_from_metadata
115
+ from .utils.metadata import analyze_from_metadata
117
116
 
118
117
  asyncio.run(analyze_from_metadata(image, output_format, verbose))
119
118
  else:
@@ -175,11 +174,17 @@ def debug(
175
174
  hud debug . --max-phase 3 # Stop after phase 3
176
175
  """
177
176
  # Import here to avoid circular imports
178
- from .env_utils import get_image_name, is_environment_directory, build_environment, image_exists
179
177
  from hud.utils.design import HUDDesign
180
-
178
+
179
+ from .utils.environment import (
180
+ build_environment,
181
+ get_image_name,
182
+ image_exists,
183
+ is_environment_directory,
184
+ )
185
+
181
186
  design = HUDDesign()
182
-
187
+
183
188
  # Determine the command to run
184
189
  command = None
185
190
  docker_args = []
@@ -202,18 +207,18 @@ def debug(
202
207
  elif params:
203
208
  first_param = params[0]
204
209
  docker_args = params[1:] if len(params) > 1 else []
205
-
210
+
206
211
  # Check if it's a directory
207
212
  if Path(first_param).exists() and is_environment_directory(first_param):
208
213
  # Directory mode - like hud dev
209
214
  directory = first_param
210
-
215
+
211
216
  # Get or generate image name
212
217
  image_name, source = get_image_name(directory)
213
-
218
+
214
219
  if source == "auto":
215
220
  design.info(f"Auto-generated image name: {image_name}")
216
-
221
+
217
222
  # Build if requested or if image doesn't exist
218
223
  if build or not image_exists(image_name):
219
224
  if not build and not image_exists(image_name):
@@ -221,11 +226,10 @@ def debug(
221
226
  build = True
222
227
  else:
223
228
  raise typer.Exit(1)
224
-
225
- if build:
226
- if not build_environment(directory, image_name):
227
- raise typer.Exit(1)
228
-
229
+
230
+ if build and not build_environment(directory, image_name):
231
+ raise typer.Exit(1)
232
+
229
233
  # Build Docker command
230
234
  command = ["docker", "run", "--rm", "-i", *docker_args, image_name]
231
235
  else:
@@ -233,7 +237,9 @@ def debug(
233
237
  image = first_param
234
238
  command = ["docker", "run", "--rm", "-i", *docker_args, image]
235
239
  else:
236
- console.print("[red]Error: Must specify a directory, Docker image, --config, or --cursor[/red]")
240
+ console.print(
241
+ "[red]Error: Must specify a directory, Docker image, --config, or --cursor[/red]"
242
+ )
237
243
  console.print("\nExamples:")
238
244
  console.print(" hud debug . # Debug current directory")
239
245
  console.print(" hud debug environments/browser # Debug specific directory")
@@ -448,6 +454,11 @@ def run(
448
454
  "-v",
449
455
  help="Show detailed output",
450
456
  ),
457
+ interactive: bool = typer.Option(
458
+ False,
459
+ "--interactive",
460
+ help="Launch interactive testing mode (HTTP transport only)",
461
+ ),
451
462
  ) -> None:
452
463
  """🚀 Run MCP server locally or remotely.
453
464
 
@@ -462,6 +473,10 @@ def run(
462
473
  hud run --local hud-text-2048:latest
463
474
  hud run --local my-server:v1 -e API_KEY=xxx
464
475
  hud run --local my-server:v1 --transport http
476
+
477
+ Interactive Testing (local only):
478
+ hud run --local --interactive --transport http hud-text-2048:latest
479
+ hud run --local --interactive --transport http --port 9000 my-server:v1
465
480
  """
466
481
  if not params:
467
482
  typer.echo("❌ Docker image is required")
@@ -479,18 +494,28 @@ def run(
479
494
  # Default to remote if not explicitly local
480
495
  is_local = local and not remote
481
496
 
497
+ # Check for interactive mode restrictions
498
+ if interactive:
499
+ if transport != "http":
500
+ typer.echo("❌ Interactive mode requires HTTP transport (use --transport http)")
501
+ raise typer.Exit(1)
502
+ if not is_local:
503
+ typer.echo("❌ Interactive mode is only available for local execution (use --local)")
504
+ raise typer.Exit(1)
505
+
482
506
  if is_local:
483
507
  # Local Docker execution
484
- from .runner import run_mcp_server
508
+ from .utils.runner import run_mcp_server
485
509
 
486
- run_mcp_server(image, docker_args, transport, port, verbose)
510
+ run_mcp_server(image, docker_args, transport, port, verbose, interactive)
487
511
  else:
488
512
  # Remote execution via proxy
489
- from .remote_runner import run_remote_server
513
+ from .utils.remote_runner import run_remote_server
490
514
 
491
515
  # Get URL from options or environment
492
516
  if not url:
493
517
  from hud.settings import settings
518
+
494
519
  url = settings.hud_mcp_url
495
520
 
496
521
  run_remote_server(image, docker_args, transport, port, url, api_key, run_id, verbose)
@@ -534,9 +559,12 @@ def clone(
534
559
  raise typer.Exit(1)
535
560
 
536
561
 
537
- @app.command()
562
+ @app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
538
563
  def build(
539
- directory: str = typer.Argument(".", help="Environment directory to build"),
564
+ params: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
565
+ None,
566
+ help="Environment directory followed by optional arguments (e.g., '. -e API_KEY=secret')",
567
+ ),
540
568
  tag: str | None = typer.Option(
541
569
  None, "--tag", "-t", help="Docker image tag (default: from pyproject.toml)"
542
570
  ),
@@ -552,11 +580,47 @@ def build(
552
580
 
553
581
  Examples:
554
582
  hud build # Build current directory
555
- hud build environments/text_2048
556
- hud build . --tag my-env:v1.0
583
+ hud build environments/text_2048 -e API_KEY=secret
584
+ hud build . --tag my-env:v1.0 -e VAR1=value1 -e VAR2=value2
557
585
  hud build . --no-cache # Force rebuild
558
586
  """
559
- build_command(directory, tag, no_cache, verbose)
587
+ # Parse directory and extra arguments
588
+ if params:
589
+ directory = params[0]
590
+ extra_args = params[1:] if len(params) > 1 else []
591
+ else:
592
+ directory = "."
593
+ extra_args = []
594
+
595
+ # Parse environment variables from extra args
596
+ env_vars = {}
597
+ i = 0
598
+ while i < len(extra_args):
599
+ if extra_args[i] == "-e" and i + 1 < len(extra_args):
600
+ # Parse -e KEY=VALUE format
601
+ env_arg = extra_args[i + 1]
602
+ if "=" in env_arg:
603
+ key, value = env_arg.split("=", 1)
604
+ env_vars[key] = value
605
+ i += 2
606
+ elif extra_args[i].startswith("--env="):
607
+ # Parse --env=KEY=VALUE format
608
+ env_arg = extra_args[i][6:] # Remove --env=
609
+ if "=" in env_arg:
610
+ key, value = env_arg.split("=", 1)
611
+ env_vars[key] = value
612
+ i += 1
613
+ elif extra_args[i] == "--env" and i + 1 < len(extra_args):
614
+ # Parse --env KEY=VALUE format
615
+ env_arg = extra_args[i + 1]
616
+ if "=" in env_arg:
617
+ key, value = env_arg.split("=", 1)
618
+ env_vars[key] = value
619
+ i += 2
620
+ else:
621
+ i += 1
622
+
623
+ build_command(directory, tag, no_cache, verbose, env_vars)
560
624
 
561
625
 
562
626
  @app.command()
@@ -615,20 +679,14 @@ def list_environments(
615
679
  filter_name: str | None = typer.Option(
616
680
  None, "--filter", "-f", help="Filter environments by name (case-insensitive)"
617
681
  ),
618
- json_output: bool = typer.Option(
619
- False, "--json", help="Output as JSON"
620
- ),
621
- show_all: bool = typer.Option(
622
- False, "--all", "-a", help="Show all columns including digest"
623
- ),
624
- verbose: bool = typer.Option(
625
- False, "--verbose", "-v", help="Show detailed output"
626
- ),
682
+ json_output: bool = typer.Option(False, "--json", help="Output as JSON"),
683
+ show_all: bool = typer.Option(False, "--all", "-a", help="Show all columns including digest"),
684
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output"),
627
685
  ) -> None:
628
686
  """📋 List all HUD environments in local registry.
629
-
687
+
630
688
  Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
631
-
689
+
632
690
  Examples:
633
691
  hud list # List all environments
634
692
  hud list --filter text # Filter by name
@@ -642,21 +700,16 @@ def list_environments(
642
700
  @app.command()
643
701
  def remove(
644
702
  target: str | None = typer.Argument(
645
- None,
646
- help="Environment to remove (digest, name, or 'all' for all environments)"
647
- ),
648
- yes: bool = typer.Option(
649
- False, "--yes", "-y", help="Skip confirmation prompt"
650
- ),
651
- verbose: bool = typer.Option(
652
- False, "--verbose", "-v", help="Show detailed output"
703
+ None, help="Environment to remove (digest, name, or 'all' for all environments)"
653
704
  ),
705
+ yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"),
706
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output"),
654
707
  ) -> None:
655
708
  """🗑️ Remove HUD environments from local registry.
656
-
709
+
657
710
  Removes environment metadata from ~/.hud/envs/
658
711
  Note: This does not remove the Docker images.
659
-
712
+
660
713
  Examples:
661
714
  hud remove abc123 # Remove by digest
662
715
  hud remove text_2048 # Remove by name
@@ -740,22 +793,24 @@ def eval(
740
793
  valid_agents = ["claude", "openai"]
741
794
  if agent not in valid_agents:
742
795
  from hud.utils.design import HUDDesign
796
+
743
797
  design = HUDDesign()
744
798
  design.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
745
799
  raise typer.Exit(1)
746
-
800
+
747
801
  # Import eval_command lazily to avoid importing agent dependencies
748
802
  try:
749
803
  from .eval import eval_command
750
804
  except ImportError as e:
751
805
  from hud.utils.design import HUDDesign
806
+
752
807
  design = HUDDesign()
753
808
  design.error(
754
809
  "Evaluation dependencies are not installed. "
755
810
  "Please install with: pip install 'hud-python[agent]'"
756
811
  )
757
812
  raise typer.Exit(1) from e
758
-
813
+
759
814
  # Run the command
760
815
  eval_command(
761
816
  source=source,