hud-python 0.4.28__py3-none-any.whl → 0.4.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (77) hide show
  1. hud/__init__.py +2 -1
  2. hud/agents/base.py +81 -45
  3. hud/agents/claude.py +8 -4
  4. hud/agents/openai_chat_generic.py +66 -40
  5. hud/agents/tests/test_base.py +0 -4
  6. hud/agents/tests/test_openai.py +1 -1
  7. hud/cli/__init__.py +182 -52
  8. hud/cli/dev.py +8 -9
  9. hud/cli/eval.py +317 -119
  10. hud/cli/flows/__init__.py +0 -0
  11. hud/cli/flows/tasks.py +0 -0
  12. hud/cli/get.py +160 -0
  13. hud/cli/rl/__init__.py +567 -71
  14. hud/cli/rl/config.py +94 -0
  15. hud/cli/rl/display.py +133 -0
  16. hud/cli/rl/gpu.py +63 -0
  17. hud/cli/rl/gpu_utils.py +318 -0
  18. hud/cli/rl/presets.py +96 -0
  19. hud/cli/rl/remote_runner.py +347 -0
  20. hud/cli/rl/rl_api.py +150 -0
  21. hud/cli/rl/vllm.py +177 -0
  22. hud/cli/tests/test_analyze_metadata.py +0 -1
  23. hud/cli/utils/tasks.py +26 -0
  24. hud/clients/base.py +21 -23
  25. hud/clients/mcp_use.py +36 -44
  26. hud/clients/tests/test_mcp_use_retry.py +10 -10
  27. hud/datasets/__init__.py +4 -3
  28. hud/datasets/{execution/parallel.py → parallel.py} +1 -1
  29. hud/datasets/{execution/runner.py → runner.py} +1 -1
  30. hud/datasets/utils.py +1 -1
  31. hud/native/comparator.py +6 -6
  32. hud/native/tests/test_comparator.py +8 -8
  33. hud/native/tests/test_native_init.py +13 -11
  34. hud/otel/config.py +1 -1
  35. hud/otel/instrumentation.py +35 -0
  36. hud/rl/README.md +30 -0
  37. hud/rl/__init__.py +1 -0
  38. hud/rl/actor.py +174 -0
  39. hud/rl/buffer.py +371 -0
  40. hud/rl/chat_template.jinja +101 -0
  41. hud/rl/config.py +184 -0
  42. hud/rl/distributed.py +95 -0
  43. hud/rl/learner.py +589 -0
  44. hud/rl/tests/__init__.py +1 -0
  45. hud/rl/tests/test_learner.py +171 -0
  46. hud/rl/train.py +354 -0
  47. hud/rl/types.py +101 -0
  48. hud/rl/utils/start_vllm_server.sh +30 -0
  49. hud/rl/utils.py +524 -0
  50. hud/rl/vllm_adapter.py +125 -0
  51. hud/settings.py +6 -0
  52. hud/telemetry/__init__.py +2 -1
  53. hud/telemetry/job.py +46 -3
  54. hud/telemetry/tests/test_trace.py +3 -3
  55. hud/telemetry/trace.py +85 -13
  56. hud/tools/tests/test_computer.py +3 -3
  57. hud/tools/tests/test_computer_actions.py +1 -1
  58. hud/types.py +123 -2
  59. hud/utils/group_eval.py +223 -0
  60. hud/utils/hud_console.py +113 -13
  61. hud/utils/tasks.py +119 -0
  62. hud/utils/tests/test_version.py +1 -1
  63. hud/version.py +1 -1
  64. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/METADATA +20 -2
  65. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/RECORD +68 -48
  66. hud/cli/hf.py +0 -406
  67. hud/cli/rl/README.md +0 -243
  68. hud/cli/rl/init.py +0 -370
  69. hud/cli/rl/pod.py +0 -501
  70. hud/cli/rl/ssh.py +0 -322
  71. hud/cli/rl/train.py +0 -562
  72. hud/cli/rl/utils.py +0 -165
  73. hud/datasets/execution/__init__.py +0 -13
  74. hud/datasets/task.py +0 -116
  75. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/WHEEL +0 -0
  76. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/entry_points.txt +0 -0
  77. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/licenses/LICENSE +0 -0
hud/cli/rl/utils.py DELETED
@@ -1,165 +0,0 @@
1
- """Shared utilities for RL commands."""
2
-
3
- from __future__ import annotations
4
-
5
- import logging
6
- from pathlib import Path
7
- from typing import Any
8
-
9
- import yaml
10
-
11
- from hud.utils.hud_console import HUDConsole
12
-
13
- hud_console = HUDConsole()
14
-
15
- logger = logging.getLogger(__name__)
16
-
17
-
18
- def read_lock_file() -> dict[str, Any]:
19
- """Read and parse hud.lock.yaml file."""
20
- lock_file = Path("hud.lock.yaml")
21
- if not lock_file.exists():
22
- return {}
23
-
24
- try:
25
- with open(lock_file) as f:
26
- return yaml.safe_load(f) or {}
27
- except Exception as e:
28
- hud_console.warning(f"Could not read hud.lock.yaml: {e}")
29
- return {}
30
-
31
-
32
- def write_lock_file(data: dict[str, Any]) -> bool:
33
- """Write data to hud.lock.yaml file."""
34
- lock_file = Path("hud.lock.yaml")
35
-
36
- try:
37
- with open(lock_file, "w") as f:
38
- yaml.dump(data, f, default_flow_style=False, sort_keys=False)
39
- return True
40
- except Exception as e:
41
- hud_console.warning(f"Could not write hud.lock.yaml: {e}")
42
- return False
43
-
44
-
45
- def get_mcp_config_from_lock() -> dict[str, Any] | None:
46
- """Get MCP configuration from lock file."""
47
- lock_data = read_lock_file()
48
-
49
- # Check if there's an image reference
50
- image = lock_data.get("image")
51
- if image:
52
- return {
53
- "hud": {
54
- "url": "https://mcp.hud.so/v3/mcp",
55
- "headers": {"Authorization": "Bearer $HUD_API_KEY", "Mcp-Image": image},
56
- }
57
- }
58
-
59
- return None
60
-
61
-
62
- def get_primary_dataset() -> str | None:
63
- """Get primary dataset name from lock file."""
64
- lock_data = read_lock_file()
65
- return lock_data.get("primary_dataset", {}).get("name")
66
-
67
-
68
- def get_image_from_lock() -> str | None:
69
- """Get image name from lock file."""
70
- lock_data = read_lock_file()
71
- return lock_data.get("image")
72
-
73
-
74
- def detect_image_name() -> str | None:
75
- """Try to detect image name from various sources."""
76
- # First check lock file
77
- image = get_image_from_lock()
78
- if image:
79
- return image
80
-
81
- # Check pyproject.toml
82
- pyproject = Path("pyproject.toml")
83
- if pyproject.exists():
84
- try:
85
- import tomllib
86
-
87
- with open(pyproject, "rb") as f:
88
- data = tomllib.load(f)
89
-
90
- # Check for hud.image_name
91
- image = data.get("tool", {}).get("hud", {}).get("image_name")
92
- if image:
93
- return image
94
-
95
- # Use project name
96
- name = data.get("project", {}).get("name")
97
- if name:
98
- return f"{name}:latest"
99
- except Exception:
100
- logger.warning("Failed to load pyproject.toml")
101
-
102
- # Use directory name as last resort
103
- return f"{Path.cwd().name}:latest"
104
-
105
-
106
- def validate_dataset_name(name: str) -> bool:
107
- """Validate HuggingFace dataset name format."""
108
- if not name:
109
- return False
110
-
111
- if "/" not in name:
112
- hud_console.error(f"Invalid dataset name: {name}")
113
- hud_console.info("Dataset name should be in format: org/dataset")
114
- return False
115
-
116
- parts = name.split("/")
117
- if len(parts) != 2:
118
- hud_console.error(f"Invalid dataset name: {name}")
119
- return False
120
-
121
- org, dataset = parts
122
- if not org or not dataset:
123
- hud_console.error(f"Invalid dataset name: {name}")
124
- return False
125
-
126
- # Check for valid characters (alphanumeric, dash, underscore)
127
- import re
128
-
129
- if not re.match(r"^[a-zA-Z0-9_-]+$", org) or not re.match(r"^[a-zA-Z0-9_-]+$", dataset):
130
- hud_console.error(f"Invalid characters in dataset name: {name}")
131
- hud_console.info("Use only letters, numbers, dashes, and underscores")
132
- return False
133
-
134
- return True
135
-
136
-
137
- def create_tasks_template() -> list[dict[str, Any]]:
138
- """Create a template for tasks.json file."""
139
- return [
140
- {
141
- "id": "example-task-001",
142
- "prompt": "Complete the first TODO item in the list",
143
- "mcp_config": {
144
- "# TODO": "Add your MCP configuration here",
145
- "# Example for remote": {
146
- "hud": {
147
- "url": "https://mcp.hud.so/v3/mcp",
148
- "headers": {
149
- "Authorization": "Bearer $HUD_API_KEY",
150
- "Mcp-Image": "your-org/your-env:latest",
151
- },
152
- }
153
- },
154
- "# Example for local": {
155
- "local": {"command": "docker", "args": ["run", "--rm", "-i", "your-env:latest"]}
156
- },
157
- },
158
- "setup_tool": {"name": "setup", "arguments": {"name": "todo_seed", "num_items": 5}},
159
- "evaluate_tool": {
160
- "name": "evaluate",
161
- "arguments": {"name": "todo_completed", "expected_count": 1},
162
- },
163
- "metadata": {"difficulty": "easy", "category": "task_completion"},
164
- }
165
- ]
@@ -1,13 +0,0 @@
1
- """Dataset execution module."""
2
-
3
- from __future__ import annotations
4
-
5
- from .parallel import calculate_optimal_workers, run_dataset_parallel, run_dataset_parallel_manual
6
- from .runner import run_dataset
7
-
8
- __all__ = [
9
- "calculate_optimal_workers",
10
- "run_dataset",
11
- "run_dataset_parallel",
12
- "run_dataset_parallel_manual",
13
- ]
hud/datasets/task.py DELETED
@@ -1,116 +0,0 @@
1
- """Task model for HUD datasets."""
2
-
3
- from __future__ import annotations
4
-
5
- import json
6
- import logging
7
- from collections import defaultdict
8
- from string import Template
9
- from typing import Any
10
-
11
- from pydantic import BaseModel, Field, field_validator
12
-
13
- from hud.settings import settings
14
- from hud.types import MCPToolCall
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
-
19
- class Task(BaseModel):
20
- """
21
- A task configuration that can be used to create a task.
22
-
23
- The mcp_config field supports environment variable substitution using
24
- template placeholders in the format ${VAR_NAME} or ${VAR_NAME:default_value}.
25
-
26
- Example:
27
- mcp_config: {
28
- "hud": {
29
- "url": "${HUD_MCP_URL:https://mcp.hud.so/v3/mcp}",
30
- "headers": {
31
- "Authorization": "Bearer ${HUD_API_KEY}",
32
- "Mcp-Image": "your-mcp-image"
33
- }
34
- }
35
- }
36
- """
37
-
38
- id: str | None = None
39
- prompt: str
40
- mcp_config: dict[str, Any]
41
- setup_tool: MCPToolCall | list[MCPToolCall] | None = None
42
- evaluate_tool: MCPToolCall | list[MCPToolCall] | None = None
43
- system_prompt: str | None = None
44
- metadata: dict[str, Any] = Field(default_factory=dict)
45
-
46
- @field_validator("mcp_config", "metadata", mode="before")
47
- @classmethod
48
- def parse_json_strings(cls, v: Any) -> Any:
49
- """Parse JSON strings into dictionaries."""
50
- if isinstance(v, str):
51
- try:
52
- return json.loads(v)
53
- except json.JSONDecodeError as e:
54
- from hud.shared.exceptions import HudConfigError
55
-
56
- raise HudConfigError(f"Invalid JSON string: {e}") from e
57
- return v
58
-
59
- @field_validator("setup_tool", "evaluate_tool", mode="before")
60
- @classmethod
61
- def convert_dict_to_tool_call(cls, v: Any) -> Any:
62
- """Convert dict to MCPToolCall instance, parsing JSON strings first."""
63
- if v is None:
64
- return None
65
-
66
- # Parse JSON string if needed
67
- if isinstance(v, str):
68
- try:
69
- v = json.loads(v)
70
- except json.JSONDecodeError as e:
71
- from hud.shared.exceptions import HudConfigError
72
-
73
- raise HudConfigError(f"Invalid JSON string: {e}") from e
74
-
75
- if isinstance(v, dict):
76
- return MCPToolCall(**v)
77
- if isinstance(v, list):
78
- return [MCPToolCall(**item) if isinstance(item, dict) else item for item in v]
79
- return v
80
-
81
- @field_validator("mcp_config", mode="before")
82
- @classmethod
83
- def resolve_env_vars(cls, v: dict[str, Any]) -> dict[str, Any]:
84
- """
85
- Automatically resolve environment variables in mcp_config using Template.
86
-
87
- Supports ${VAR_NAME} syntax with variable substitution from
88
- System environment variables (including HUD_API_KEY, etc.)
89
-
90
- Missing variables resolve to empty strings.
91
- """
92
- import os
93
-
94
- # Start with current environment variables
95
- mapping = dict(os.environ)
96
- mapping.update(settings.model_dump())
97
-
98
- if settings.api_key:
99
- mapping["HUD_API_KEY"] = settings.api_key
100
- else:
101
- logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
102
-
103
- def substitute_in_value(obj: Any) -> Any:
104
- """Recursively substitute variables in nested structures."""
105
- if isinstance(obj, str):
106
- # Use Template's substitute with defaultdict - missing vars become empty strings
107
- safe_mapping = defaultdict(str, mapping)
108
- return Template(obj).substitute(safe_mapping)
109
- elif isinstance(obj, dict):
110
- return {k: substitute_in_value(v) for k, v in obj.items()}
111
- elif isinstance(obj, list):
112
- return [substitute_in_value(item) for item in obj]
113
- else:
114
- return obj
115
-
116
- return substitute_in_value(v)