hud-python 0.4.36__py3-none-any.whl → 0.4.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (43) hide show
  1. hud/agents/__init__.py +2 -0
  2. hud/agents/lite_llm.py +72 -0
  3. hud/agents/openai_chat_generic.py +21 -7
  4. hud/cli/__init__.py +19 -4
  5. hud/cli/build.py +17 -2
  6. hud/cli/dev.py +1 -1
  7. hud/cli/eval.py +93 -13
  8. hud/cli/flows/tasks.py +197 -65
  9. hud/cli/push.py +9 -0
  10. hud/cli/rl/__init__.py +14 -4
  11. hud/cli/rl/celebrate.py +187 -0
  12. hud/cli/rl/config.py +15 -8
  13. hud/cli/rl/local_runner.py +44 -20
  14. hud/cli/rl/remote_runner.py +163 -86
  15. hud/cli/rl/viewer.py +141 -0
  16. hud/cli/rl/wait_utils.py +89 -0
  17. hud/cli/utils/env_check.py +196 -0
  18. hud/cli/utils/source_hash.py +108 -0
  19. hud/clients/base.py +1 -1
  20. hud/clients/fastmcp.py +1 -1
  21. hud/otel/config.py +1 -1
  22. hud/otel/context.py +2 -2
  23. hud/rl/vllm_adapter.py +1 -1
  24. hud/server/server.py +84 -13
  25. hud/server/tests/test_add_tool.py +60 -0
  26. hud/server/tests/test_context.py +128 -0
  27. hud/server/tests/test_mcp_server_handlers.py +44 -0
  28. hud/server/tests/test_mcp_server_integration.py +405 -0
  29. hud/server/tests/test_mcp_server_more.py +247 -0
  30. hud/server/tests/test_run_wrapper.py +53 -0
  31. hud/server/tests/test_server_extra.py +166 -0
  32. hud/server/tests/test_sigterm_runner.py +78 -0
  33. hud/shared/hints.py +1 -1
  34. hud/telemetry/job.py +2 -2
  35. hud/types.py +9 -2
  36. hud/utils/tasks.py +32 -24
  37. hud/utils/tests/test_version.py +1 -1
  38. hud/version.py +1 -1
  39. {hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/METADATA +14 -12
  40. {hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/RECORD +43 -29
  41. {hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/WHEEL +0 -0
  42. {hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/entry_points.txt +0 -0
  43. {hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/licenses/LICENSE +0 -0
hud/agents/__init__.py CHANGED
@@ -2,12 +2,14 @@ from __future__ import annotations
2
2
 
3
3
  from .base import MCPAgent
4
4
  from .claude import ClaudeAgent
5
+ from .lite_llm import LiteAgent
5
6
  from .openai import OperatorAgent
6
7
  from .openai_chat_generic import GenericOpenAIChatAgent
7
8
 
8
9
  __all__ = [
9
10
  "ClaudeAgent",
10
11
  "GenericOpenAIChatAgent",
12
+ "LiteAgent",
11
13
  "MCPAgent",
12
14
  "OperatorAgent",
13
15
  ]
hud/agents/lite_llm.py ADDED
@@ -0,0 +1,72 @@
1
+ """LiteLLM MCP Agent implementation.
2
+
3
+ Same OpenAI chat-completions shape + MCP tool plumbing,
4
+ but transport is LiteLLM and (optionally) tools are shaped by LiteLLM's MCP transformer.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from typing import Any, ClassVar
11
+
12
+ import litellm
13
+
14
+ from .openai_chat_generic import GenericOpenAIChatAgent
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Prefer LiteLLM's built-in MCP -> OpenAI tool transformer (handles Bedrock nuances)
19
+ try:
20
+ from litellm.experimental_mcp_client.tools import (
21
+ transform_mcp_tool_to_openai_tool,
22
+ )
23
+ except Exception: # pragma: no cover - optional dependency
24
+ transform_mcp_tool_to_openai_tool = None # type: ignore
25
+
26
+
27
+ class LiteAgent(GenericOpenAIChatAgent):
28
+ """
29
+ Same OpenAI chat-completions shape + MCP tool plumbing,
30
+ but transport is LiteLLM and (optionally) tools are shaped by LiteLLM's MCP transformer.
31
+ """
32
+
33
+ metadata: ClassVar[dict[str, Any]] = {}
34
+
35
+ def __init__(
36
+ self,
37
+ *,
38
+ model_name: str = "gpt-4o-mini",
39
+ completion_kwargs: dict[str, Any] | None = None,
40
+ **agent_kwargs: Any,
41
+ ) -> None:
42
+ # We don't need an OpenAI client; pass None
43
+ super().__init__(
44
+ openai_client=None,
45
+ model_name=model_name,
46
+ completion_kwargs=completion_kwargs,
47
+ **agent_kwargs,
48
+ )
49
+
50
+ def get_tool_schemas(self) -> list[dict]:
51
+ # Prefer LiteLLM's stricter transformer (handles Bedrock & friends)
52
+ if transform_mcp_tool_to_openai_tool is not None:
53
+ return [
54
+ transform_mcp_tool_to_openai_tool(t) # returns ChatCompletionToolParam-like dict
55
+ for t in self.get_available_tools()
56
+ ]
57
+ # Fallback to the generic OpenAI sanitizer
58
+ return GenericOpenAIChatAgent.get_tool_schemas(self)
59
+
60
+ async def _invoke_chat_completion(
61
+ self,
62
+ *,
63
+ messages: list[Any],
64
+ tools: list[dict] | None,
65
+ extra: dict[str, Any],
66
+ ):
67
+ return await litellm.acompletion(
68
+ model=self.model_name,
69
+ messages=messages,
70
+ tools=tools or None, # LiteLLM tolerates None better than []
71
+ **extra,
72
+ )
@@ -42,7 +42,7 @@ class GenericOpenAIChatAgent(MCPAgent):
42
42
  def __init__(
43
43
  self,
44
44
  *,
45
- openai_client: AsyncOpenAI,
45
+ openai_client: AsyncOpenAI | None,
46
46
  model_name: str = "gpt-4o-mini",
47
47
  completion_kwargs: dict[str, Any] | None = None,
48
48
  **agent_kwargs: Any,
@@ -171,6 +171,23 @@ class GenericOpenAIChatAgent(MCPAgent):
171
171
  openai_tools.append(openai_tool)
172
172
  return openai_tools
173
173
 
174
+ async def _invoke_chat_completion(
175
+ self,
176
+ *,
177
+ messages: list[Any],
178
+ tools: list[dict] | None,
179
+ extra: dict[str, Any],
180
+ ):
181
+ if self.oai is None:
182
+ raise ValueError("openai_client is required for GenericOpenAIChatAgent")
183
+ # default transport = OpenAI SDK
184
+ return await self.oai.chat.completions.create(
185
+ model=self.model_name,
186
+ messages=messages,
187
+ tools=tools, # already ChatCompletionToolParam-shaped
188
+ **extra,
189
+ )
190
+
174
191
  @instrument(
175
192
  span_type="agent",
176
193
  record_args=False,
@@ -180,17 +197,14 @@ class GenericOpenAIChatAgent(MCPAgent):
180
197
  """Send chat request to OpenAI and convert the response."""
181
198
 
182
199
  # Convert MCP tool schemas to OpenAI format
183
- mcp_schemas = self.get_tool_schemas()
200
+ tools = cast("list[ChatCompletionToolParam]", self.get_tool_schemas())
184
201
 
185
202
  protected_keys = {"model", "messages", "tools"}
186
203
  extra = {k: v for k, v in (self.completion_kwargs or {}).items() if k not in protected_keys}
187
204
 
188
205
  try:
189
- response = await self.oai.chat.completions.create(
190
- model=self.model_name,
191
- messages=messages,
192
- tools=cast("list[ChatCompletionToolParam]", mcp_schemas),
193
- **extra,
206
+ response = await self._invoke_chat_completion(
207
+ messages=messages, tools=tools, extra=extra
194
208
  )
195
209
  except Exception as e:
196
210
  error_content = f"Error getting response {e}"
hud/cli/__init__.py CHANGED
@@ -912,7 +912,7 @@ def eval(
912
912
  agent: str | None = typer.Argument(
913
913
  None,
914
914
  help=(
915
- "Agent backend to use (claude, openai, or vllm). If not provided, will prompt interactively." # noqa: E501
915
+ "Agent backend to use (claude, openai, vllm, or litellm). If not provided, will prompt interactively." # noqa: E501
916
916
  ),
917
917
  ),
918
918
  full: bool = typer.Option(
@@ -960,6 +960,12 @@ def eval(
960
960
  "--verbose",
961
961
  help="Enable verbose output from the agent",
962
962
  ),
963
+ very_verbose: bool = typer.Option(
964
+ False,
965
+ "--very-verbose",
966
+ "-vv",
967
+ help="Enable debug-level logs for maximum visibility",
968
+ ),
963
969
  vllm_base_url: str | None = typer.Option(
964
970
  None,
965
971
  "--vllm-base-url",
@@ -1025,13 +1031,14 @@ def eval(
1025
1031
  {"name": "Claude 4 Sonnet", "value": "claude"},
1026
1032
  {"name": "OpenAI Computer Use", "value": "openai"},
1027
1033
  {"name": "vLLM (Local Server)", "value": "vllm"},
1034
+ {"name": "LiteLLM (Multi-provider)", "value": "litellm"},
1028
1035
  ]
1029
1036
  )
1030
1037
 
1031
1038
  agent = hud_console.select("Select an agent to use:", choices=choices, default=0)
1032
1039
 
1033
1040
  # Handle HUD model selection
1034
- if agent and agent not in ["claude", "openai", "vllm"]:
1041
+ if agent and agent not in ["claude", "openai", "vllm", "litellm"]:
1035
1042
  # Find remote model name
1036
1043
  model = agent
1037
1044
  if not vllm_base_url:
@@ -1052,7 +1059,7 @@ def eval(
1052
1059
  hud_console.info(f"Using HUD model: {model} (trained on {base_model})")
1053
1060
 
1054
1061
  # Validate agent choice
1055
- valid_agents = ["claude", "openai", "vllm"]
1062
+ valid_agents = ["claude", "openai", "vllm", "litellm"]
1056
1063
  if agent not in valid_agents:
1057
1064
  hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
1058
1065
  raise typer.Exit(1)
@@ -1070,6 +1077,7 @@ def eval(
1070
1077
  max_workers=max_workers,
1071
1078
  max_concurrent_per_worker=max_concurrent_per_worker,
1072
1079
  verbose=verbose,
1080
+ very_verbose=very_verbose,
1073
1081
  vllm_base_url=vllm_base_url,
1074
1082
  group_size=group_size,
1075
1083
  )
@@ -1119,7 +1127,7 @@ def rl(
1119
1127
  ),
1120
1128
  model: str | None = typer.Argument(
1121
1129
  None,
1122
- help="Model to train (default: interactive selection)",
1130
+ help="Model to train from https://hud.so/models (default: interactive selection)",
1123
1131
  ),
1124
1132
  config_file: Path | None = typer.Option( # noqa: B008
1125
1133
  None,
@@ -1159,6 +1167,12 @@ def rl(
1159
1167
  "--ddp-gpus",
1160
1168
  help="Specific GPUs for DDP (e.g., '0,1,2,3')",
1161
1169
  ),
1170
+ yes: bool = typer.Option(
1171
+ False,
1172
+ "--yes",
1173
+ "-y",
1174
+ help="Auto-accept all prompts and use defaults (lazy mode)",
1175
+ ),
1162
1176
  vllm_gpu: int | None = typer.Option(
1163
1177
  None,
1164
1178
  "--vllm-gpu",
@@ -1180,6 +1194,7 @@ def rl(
1180
1194
  no_ddp=no_ddp,
1181
1195
  ddp_gpus=ddp_gpus,
1182
1196
  vllm_gpu=vllm_gpu,
1197
+ yes=yes,
1183
1198
  )
1184
1199
 
1185
1200
 
hud/cli/build.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
+ import contextlib
6
7
  import hashlib
7
8
  import subprocess
8
9
  import time
@@ -13,6 +14,7 @@ from typing import Any
13
14
  import typer
14
15
  import yaml
15
16
 
17
+ from hud.cli.utils.source_hash import compute_source_hash, list_source_files
16
18
  from hud.clients import MCPClient
17
19
  from hud.utils.hud_console import HUDConsole
18
20
  from hud.version import __version__ as hud_version
@@ -341,10 +343,11 @@ def build_environment(
341
343
  required_env, optional_env = extract_env_vars_from_dockerfile(dockerfile_path)
342
344
 
343
345
  # Merge user-provided env vars with detected ones
344
- provided_env_vars = {}
346
+ provided_env_vars: dict[str, str] = {}
345
347
  missing_required = []
346
348
  if env_vars:
347
- provided_env_vars = env_vars.copy()
349
+ # Use placeholders in lock file for any provided values to avoid storing secrets
350
+ provided_env_vars = {k: f"${{{k}}}" for k in env_vars}
348
351
  # Track which required vars are still missing
349
352
  missing_required = [e for e in required_env if e not in env_vars]
350
353
 
@@ -384,6 +387,8 @@ def build_environment(
384
387
  "hudVersion": hud_version,
385
388
  "directory": str(env_dir.name),
386
389
  "version": new_version, # Internal environment version
390
+ # Fast source fingerprint for change detection
391
+ "sourceHash": compute_source_hash(env_dir),
387
392
  },
388
393
  "environment": {
389
394
  "initializeMs": analysis["initializeMs"],
@@ -424,6 +429,16 @@ def build_environment(
424
429
  with open(lock_path, "w") as f:
425
430
  yaml.dump(lock_content, f, default_flow_style=False, sort_keys=False)
426
431
 
432
+ # Also write the file list we hashed for transparency (non-essential)
433
+ with contextlib.suppress(Exception):
434
+ files = [
435
+ str(p.resolve().relative_to(env_dir)).replace("\\", "/")
436
+ for p in list_source_files(env_dir)
437
+ ]
438
+ lock_content["build"]["sourceFiles"] = files
439
+ with open(lock_path, "w") as f:
440
+ yaml.dump(lock_content, f, default_flow_style=False, sort_keys=False)
441
+
427
442
  hud_console.success("Created lock file: hud.lock.yaml")
428
443
 
429
444
  # Calculate lock file hash
hud/cli/dev.py CHANGED
@@ -530,7 +530,7 @@ async def start_mcp_proxy(
530
530
  stderr=asyncio.subprocess.DEVNULL,
531
531
  )
532
532
  await stop_result.communicate()
533
- hud_console.success("Container stopped successfully")
533
+ hud_console.success("Container stopped successfully")
534
534
  container_stopped = True
535
535
  except Exception as e:
536
536
  hud_console.warning(f"Failed to stop container: {e}")
hud/cli/eval.py CHANGED
@@ -5,15 +5,18 @@ from __future__ import annotations
5
5
  import asyncio
6
6
  import logging
7
7
  from pathlib import Path
8
- from typing import Any, Literal
8
+ from typing import TYPE_CHECKING, Any, Literal
9
9
 
10
10
  import typer
11
11
 
12
12
  import hud
13
+ from hud.cli.utils.env_check import ensure_built, find_environment_dir
13
14
  from hud.settings import settings
14
15
  from hud.utils.group_eval import display_group_statistics, run_tasks_grouped
15
16
  from hud.utils.hud_console import HUDConsole
16
17
 
18
+ if TYPE_CHECKING:
19
+ from hud.types import Task
17
20
  logger = logging.getLogger(__name__)
18
21
  hud_console = HUDConsole()
19
22
 
@@ -27,7 +30,7 @@ def get_available_models() -> list[dict[str, str | None]]:
27
30
  try:
28
31
  from hud.cli.rl import rl_api
29
32
 
30
- hud_console.info("Fetching your models from https://app.hud.so/models")
33
+ hud_console.info("Fetching your models from https://hud.so/models")
31
34
  models = rl_api.list_models()
32
35
 
33
36
  # Filter for ready models only and sort by recency
@@ -66,7 +69,7 @@ def get_available_models() -> list[dict[str, str | None]]:
66
69
 
67
70
 
68
71
  def build_agent(
69
- agent_type: Literal["claude", "openai", "vllm"],
72
+ agent_type: Literal["claude", "openai", "vllm", "litellm"],
70
73
  *,
71
74
  model: str | None = None,
72
75
  allowed_tools: list[str] | None = None,
@@ -138,6 +141,22 @@ def build_agent(
138
141
  else:
139
142
  return OperatorAgent(verbose=verbose)
140
143
 
144
+ elif agent_type == "litellm":
145
+ try:
146
+ from hud.agents.lite_llm import LiteAgent
147
+ except ImportError as e:
148
+ hud_console.error(
149
+ "LiteLLM agent dependencies are not installed. "
150
+ "Please install with: pip install 'hud-python[agent]'"
151
+ )
152
+ raise typer.Exit(1) from e
153
+
154
+ return LiteAgent(
155
+ model_name=model or "gpt-4o-mini",
156
+ allowed_tools=allowed_tools,
157
+ verbose=verbose,
158
+ )
159
+
141
160
  # Fallback Claude agent (Anthropic)
142
161
  try:
143
162
  from hud.agents import ClaudeAgent
@@ -166,7 +185,7 @@ def build_agent(
166
185
  async def run_single_task(
167
186
  source: str,
168
187
  *,
169
- agent_type: Literal["claude", "openai", "vllm"] = "claude",
188
+ agent_type: Literal["claude", "openai", "vllm", "litellm"] = "claude",
170
189
  model: str | None = None,
171
190
  allowed_tools: list[str] | None = None,
172
191
  max_steps: int = 10,
@@ -192,7 +211,16 @@ async def run_single_task(
192
211
  hud_console.info("📊 Loading task file…")
193
212
 
194
213
  # Use unified loader for both JSON and JSONL
195
- tasks = load_tasks(str(path))
214
+ tasks: list[Task] = load_tasks(str(path)) # type: ignore[assignment]
215
+
216
+ # If tasks reference a local environment (nearby), ensure it's built/up-to-date.
217
+ try:
218
+ env_dir = find_environment_dir(path)
219
+ if env_dir is not None:
220
+ # Non-interactive for eval; warn but don't block
221
+ ensure_built(env_dir, interactive=True)
222
+ except Exception as e:
223
+ hud_console.debug(f"Eval preflight env check skipped: {e}")
196
224
 
197
225
  # Single task - use the first (and only) task
198
226
  task = tasks[0]
@@ -200,7 +228,7 @@ async def run_single_task(
200
228
  else:
201
229
  # Load from HuggingFace dataset or non-file source
202
230
  hud_console.info(f"📊 Loading tasks from: {source}…")
203
- tasks = load_tasks(source)
231
+ tasks: list[Task] = load_tasks(source) # type: ignore[assignment]
204
232
 
205
233
  if not tasks:
206
234
  hud_console.error(f"No tasks found in: {source}")
@@ -248,6 +276,16 @@ async def run_single_task(
248
276
  agent_config = {"verbose": verbose}
249
277
  if allowed_tools:
250
278
  agent_config["allowed_tools"] = allowed_tools
279
+ elif agent_type == "litellm":
280
+ from hud.agents.lite_llm import LiteAgent
281
+
282
+ agent_class = LiteAgent
283
+ agent_config = {
284
+ "model_name": model or "gpt-4o-mini",
285
+ "verbose": verbose,
286
+ }
287
+ if allowed_tools:
288
+ agent_config["allowed_tools"] = allowed_tools
251
289
  else:
252
290
  from hud.agents import ClaudeAgent
253
291
 
@@ -292,7 +330,7 @@ async def run_single_task(
292
330
  async def run_full_dataset(
293
331
  source: str,
294
332
  *,
295
- agent_type: Literal["claude", "openai", "vllm"] = "claude",
333
+ agent_type: Literal["claude", "openai", "vllm", "litellm"] = "claude",
296
334
  model: str | None = None,
297
335
  allowed_tools: list[str] | None = None,
298
336
  max_concurrent: int = 30,
@@ -322,7 +360,7 @@ async def run_full_dataset(
322
360
 
323
361
  # Load tasks using unified loader
324
362
  hud_console.info(f"📊 Loading tasks from: {source}…")
325
- tasks = load_tasks(source)
363
+ tasks: list[Task] = load_tasks(source) # type: ignore[assignment]
326
364
 
327
365
  if not tasks:
328
366
  hud_console.error(f"No tasks found in: {source}")
@@ -385,6 +423,25 @@ async def run_full_dataset(
385
423
  if allowed_tools:
386
424
  agent_config["allowed_tools"] = allowed_tools
387
425
 
426
+ elif agent_type == "litellm":
427
+ try:
428
+ from hud.agents.lite_llm import LiteAgent
429
+
430
+ agent_class = LiteAgent
431
+ except ImportError as e:
432
+ hud_console.error(
433
+ "LiteLLM agent dependencies are not installed. "
434
+ "Please install with: pip install 'hud-python[agent]'"
435
+ )
436
+ raise typer.Exit(1) from e
437
+
438
+ agent_config = {
439
+ "model_name": model or "gpt-4o-mini",
440
+ "verbose": verbose,
441
+ }
442
+ if allowed_tools:
443
+ agent_config["allowed_tools"] = allowed_tools
444
+
388
445
  else:
389
446
  try:
390
447
  from hud.agents import ClaudeAgent
@@ -501,10 +558,10 @@ def eval_command(
501
558
  "--full",
502
559
  help="Run the entire dataset (omit for single-task debug mode)",
503
560
  ),
504
- agent: Literal["claude", "openai", "vllm"] = typer.Option(
561
+ agent: Literal["claude", "openai", "vllm", "litellm"] = typer.Option(
505
562
  "claude",
506
563
  "--agent",
507
- help="Agent backend to use (claude, openai, or vllm for local server)",
564
+ help="Agent backend to use (claude, openai, vllm for local server, or litellm)",
508
565
  ),
509
566
  model: str | None = typer.Option(
510
567
  None,
@@ -546,6 +603,12 @@ def eval_command(
546
603
  "--verbose",
547
604
  help="Enable verbose output from the agent",
548
605
  ),
606
+ very_verbose: bool = typer.Option(
607
+ False,
608
+ "--very-verbose",
609
+ "-vv",
610
+ help="Enable debug-level logs for maximum visibility",
611
+ ),
549
612
  vllm_base_url: str | None = typer.Option(
550
613
  None,
551
614
  "--vllm-base-url",
@@ -595,6 +658,23 @@ def eval_command(
595
658
  """
596
659
  from hud.settings import settings
597
660
 
661
+ if very_verbose:
662
+ logging.basicConfig(
663
+ level=logging.DEBUG,
664
+ format="%(asctime)s - %(name)s - %(message)s",
665
+ datefmt="%H:%M:%S",
666
+ )
667
+ logging.getLogger("hud.agents").setLevel(logging.DEBUG)
668
+ logging.getLogger("hud.agents.base").setLevel(logging.DEBUG)
669
+ elif verbose:
670
+ logging.basicConfig(
671
+ level=logging.INFO,
672
+ format="%(asctime)s - %(name)s - %(message)s",
673
+ datefmt="%H:%M:%S",
674
+ )
675
+ logging.getLogger("hud.agents").setLevel(logging.INFO)
676
+ logging.getLogger("hud.agents.base").setLevel(logging.INFO)
677
+
598
678
  # Check for required API keys
599
679
  if agent == "claude":
600
680
  if not settings.anthropic_api_key:
@@ -617,7 +697,7 @@ def eval_command(
617
697
  # Check for HUD_API_KEY if using HUD services
618
698
  if not settings.api_key:
619
699
  hud_console.warning("HUD_API_KEY not set. Some features may be limited.")
620
- hud_console.info("Get your API key at: https://app.hud.so")
700
+ hud_console.info("Get your API key at: https://hud.so")
621
701
  hud_console.info("Set it in your environment or run: hud set HUD_API_KEY=your-key-here")
622
702
 
623
703
  # Parse allowed tools
@@ -642,7 +722,7 @@ def eval_command(
642
722
  parallel=parallel,
643
723
  max_workers=max_workers,
644
724
  max_concurrent_per_worker=max_concurrent_per_worker,
645
- verbose=verbose,
725
+ verbose=very_verbose or verbose,
646
726
  vllm_base_url=vllm_base_url,
647
727
  group_size=group_size,
648
728
  )
@@ -655,7 +735,7 @@ def eval_command(
655
735
  model=model,
656
736
  allowed_tools=allowed_tools_list,
657
737
  max_steps=max_steps,
658
- verbose=verbose,
738
+ verbose=very_verbose or verbose,
659
739
  vllm_base_url=vllm_base_url,
660
740
  group_size=group_size,
661
741
  )