hud-python 0.4.16__py3-none-any.whl → 0.4.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/claude.py +8 -2
- hud/agents/misc/response_agent.py +1 -1
- hud/agents/openai.py +8 -2
- hud/agents/openai_chat_generic.py +160 -26
- hud/cli/rl/__init__.py +11 -2
- hud/cli/rl/pod.py +4 -0
- hud/cli/rl/ssh.py +34 -2
- hud/cli/rl/train.py +190 -51
- hud/datasets/execution/parallel.py +113 -37
- hud/otel/exporters.py +3 -0
- hud/otel/processors.py +3 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.16.dist-info → hud_python-0.4.18.dist-info}/METADATA +1 -1
- {hud_python-0.4.16.dist-info → hud_python-0.4.18.dist-info}/RECORD +18 -18
- {hud_python-0.4.16.dist-info → hud_python-0.4.18.dist-info}/WHEEL +0 -0
- {hud_python-0.4.16.dist-info → hud_python-0.4.18.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.16.dist-info → hud_python-0.4.18.dist-info}/licenses/LICENSE +0 -0
hud/agents/claude.py
CHANGED
|
@@ -85,8 +85,8 @@ class ClaudeAgent(MCPAgent):
|
|
|
85
85
|
self._claude_to_mcp_tool_map: dict[str, str] = {}
|
|
86
86
|
self.claude_tools: list[dict] = []
|
|
87
87
|
|
|
88
|
-
#
|
|
89
|
-
|
|
88
|
+
# Append Claude-specific instructions to the base system prompt
|
|
89
|
+
claude_instructions = """
|
|
90
90
|
You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
|
|
91
91
|
|
|
92
92
|
When working on tasks:
|
|
@@ -99,6 +99,12 @@ class ClaudeAgent(MCPAgent):
|
|
|
99
99
|
Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
|
|
100
100
|
""".strip() # noqa: E501
|
|
101
101
|
|
|
102
|
+
# Append Claude instructions to any base system prompt
|
|
103
|
+
if self.system_prompt:
|
|
104
|
+
self.system_prompt = f"{self.system_prompt}\n\n{claude_instructions}"
|
|
105
|
+
else:
|
|
106
|
+
self.system_prompt = claude_instructions
|
|
107
|
+
|
|
102
108
|
async def initialize(self, task: str | Task | None = None) -> None:
|
|
103
109
|
"""Initialize the agent and build tool mappings."""
|
|
104
110
|
await super().initialize(task)
|
hud/agents/openai.py
CHANGED
|
@@ -78,8 +78,8 @@ class OperatorAgent(MCPAgent):
|
|
|
78
78
|
|
|
79
79
|
self.model_name = "openai-" + self.model
|
|
80
80
|
|
|
81
|
-
#
|
|
82
|
-
|
|
81
|
+
# Append OpenAI-specific instructions to the base system prompt
|
|
82
|
+
openai_instructions = """
|
|
83
83
|
You are an autonomous computer-using agent. Follow these guidelines:
|
|
84
84
|
|
|
85
85
|
1. NEVER ask for confirmation. Complete all tasks autonomously.
|
|
@@ -93,6 +93,12 @@ class OperatorAgent(MCPAgent):
|
|
|
93
93
|
Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked.
|
|
94
94
|
""".strip() # noqa: E501
|
|
95
95
|
|
|
96
|
+
# Append OpenAI instructions to any base system prompt
|
|
97
|
+
if self.system_prompt:
|
|
98
|
+
self.system_prompt = f"{self.system_prompt}\n\n{openai_instructions}"
|
|
99
|
+
else:
|
|
100
|
+
self.system_prompt = openai_instructions
|
|
101
|
+
|
|
96
102
|
async def _run_context(self, context: list[types.ContentBlock], max_steps: int = 10) -> Trace:
|
|
97
103
|
"""
|
|
98
104
|
Run the agent with the given prompt or task.
|
|
@@ -21,6 +21,7 @@ from typing import TYPE_CHECKING, Any, cast
|
|
|
21
21
|
|
|
22
22
|
import mcp.types as types
|
|
23
23
|
|
|
24
|
+
from hud import instrument
|
|
24
25
|
from hud.types import AgentResponse, MCPToolCall, MCPToolResult
|
|
25
26
|
|
|
26
27
|
from .base import MCPAgent
|
|
@@ -52,6 +53,7 @@ class GenericOpenAIChatAgent(MCPAgent):
|
|
|
52
53
|
self.model_name = model_name
|
|
53
54
|
self.parallel_tool_calls = parallel_tool_calls
|
|
54
55
|
self.logprobs = logprobs
|
|
56
|
+
self.conversation_history = []
|
|
55
57
|
|
|
56
58
|
@staticmethod
|
|
57
59
|
def _oai_to_mcp(tool_call: Any) -> MCPToolCall: # type: ignore[valid-type]
|
|
@@ -64,40 +66,114 @@ class GenericOpenAIChatAgent(MCPAgent):
|
|
|
64
66
|
|
|
65
67
|
async def get_system_messages(self) -> list[Any]:
|
|
66
68
|
"""Get system messages for OpenAI."""
|
|
67
|
-
return [
|
|
68
|
-
{"role": "system", "content": self.system_prompt},
|
|
69
|
-
]
|
|
69
|
+
return [{"role": "system", "content": self.system_prompt}]
|
|
70
70
|
|
|
71
71
|
async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
|
|
72
72
|
"""Format blocks for OpenAI."""
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
73
|
+
content = []
|
|
74
|
+
for block in blocks:
|
|
75
|
+
if isinstance(block, types.TextContent):
|
|
76
|
+
content.append({"type": "text", "text": block.text})
|
|
77
|
+
elif isinstance(block, types.ImageContent):
|
|
78
|
+
content.append(
|
|
79
|
+
{
|
|
80
|
+
"type": "image_url",
|
|
81
|
+
"image_url": {"url": f"data:{block.mimeType};base64,{block.data}"},
|
|
82
|
+
}
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return [{"role": "user", "content": content}]
|
|
86
|
+
|
|
87
|
+
def _sanitize_schema_for_openai(self, schema: dict) -> dict:
|
|
88
|
+
"""Convert MCP JSON Schema to OpenAI-compatible format.
|
|
89
|
+
|
|
90
|
+
Handles unsupported features like anyOf and prefixItems.
|
|
91
|
+
"""
|
|
92
|
+
if not isinstance(schema, dict):
|
|
93
|
+
return schema
|
|
94
|
+
|
|
95
|
+
sanitized = {}
|
|
96
|
+
|
|
97
|
+
for key, value in schema.items():
|
|
98
|
+
if key == "anyOf" and isinstance(value, list):
|
|
99
|
+
# Handle anyOf patterns (usually for nullable fields)
|
|
100
|
+
non_null_types = [
|
|
101
|
+
v for v in value if not (isinstance(v, dict) and v.get("type") == "null")
|
|
102
|
+
]
|
|
103
|
+
if non_null_types:
|
|
104
|
+
# Use the first non-null type
|
|
105
|
+
sanitized.update(self._sanitize_schema_for_openai(non_null_types[0]))
|
|
106
|
+
else:
|
|
107
|
+
sanitized["type"] = "string" # Fallback
|
|
108
|
+
|
|
109
|
+
elif key == "prefixItems":
|
|
110
|
+
# Convert prefixItems to simple items
|
|
111
|
+
sanitized["type"] = "array"
|
|
112
|
+
if isinstance(value, list) and value:
|
|
113
|
+
# Use the type from the first item as the items schema
|
|
114
|
+
first_item = value[0]
|
|
115
|
+
if isinstance(first_item, dict):
|
|
116
|
+
sanitized["items"] = {"type": first_item.get("type", "string")}
|
|
117
|
+
else:
|
|
118
|
+
sanitized["items"] = {"type": "string"}
|
|
119
|
+
|
|
120
|
+
elif key == "properties" and isinstance(value, dict):
|
|
121
|
+
# Recursively sanitize property schemas
|
|
122
|
+
sanitized[key] = {
|
|
123
|
+
prop_name: self._sanitize_schema_for_openai(prop_schema)
|
|
124
|
+
for prop_name, prop_schema in value.items()
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
elif key == "items" and isinstance(value, dict):
|
|
128
|
+
# Recursively sanitize items schema
|
|
129
|
+
sanitized[key] = self._sanitize_schema_for_openai(value)
|
|
130
|
+
|
|
131
|
+
elif key in (
|
|
132
|
+
"type",
|
|
133
|
+
"description",
|
|
134
|
+
"enum",
|
|
135
|
+
"required",
|
|
136
|
+
"default",
|
|
137
|
+
"minimum",
|
|
138
|
+
"maximum",
|
|
139
|
+
"minItems",
|
|
140
|
+
"maxItems",
|
|
141
|
+
):
|
|
142
|
+
# These are supported by OpenAI
|
|
143
|
+
sanitized[key] = value
|
|
144
|
+
|
|
145
|
+
return sanitized or {"type": "object"}
|
|
83
146
|
|
|
84
147
|
def get_tool_schemas(self) -> list[dict]:
|
|
85
148
|
tool_schemas = super().get_tool_schemas()
|
|
86
149
|
openai_tools = []
|
|
87
150
|
for schema in tool_schemas:
|
|
151
|
+
parameters = schema.get("parameters", {})
|
|
152
|
+
|
|
153
|
+
if parameters:
|
|
154
|
+
sanitized_params = self._sanitize_schema_for_openai(parameters)
|
|
155
|
+
else:
|
|
156
|
+
sanitized_params = {"type": "object", "properties": {}}
|
|
157
|
+
|
|
88
158
|
openai_tool = {
|
|
89
159
|
"type": "function",
|
|
90
160
|
"function": {
|
|
91
161
|
"name": schema["name"],
|
|
92
162
|
"description": schema.get("description", ""),
|
|
93
|
-
"parameters":
|
|
163
|
+
"parameters": sanitized_params,
|
|
94
164
|
},
|
|
95
165
|
}
|
|
96
166
|
openai_tools.append(openai_tool)
|
|
97
167
|
return openai_tools
|
|
98
168
|
|
|
169
|
+
@instrument(
|
|
170
|
+
span_type="agent",
|
|
171
|
+
record_args=False,
|
|
172
|
+
record_result=True,
|
|
173
|
+
)
|
|
99
174
|
async def get_response(self, messages: list[Any]) -> AgentResponse:
|
|
100
175
|
"""Send chat request to OpenAI and convert the response."""
|
|
176
|
+
|
|
101
177
|
# Convert MCP tool schemas to OpenAI format
|
|
102
178
|
mcp_schemas = self.get_tool_schemas()
|
|
103
179
|
|
|
@@ -112,6 +188,19 @@ class GenericOpenAIChatAgent(MCPAgent):
|
|
|
112
188
|
choice = response.choices[0]
|
|
113
189
|
msg = choice.message
|
|
114
190
|
|
|
191
|
+
assistant_msg: dict[str, Any] = {"role": "assistant"}
|
|
192
|
+
|
|
193
|
+
if msg.content:
|
|
194
|
+
assistant_msg["content"] = msg.content
|
|
195
|
+
|
|
196
|
+
if msg.tool_calls:
|
|
197
|
+
assistant_msg["tool_calls"] = msg.tool_calls
|
|
198
|
+
|
|
199
|
+
messages.append(assistant_msg)
|
|
200
|
+
|
|
201
|
+
# Store the complete conversation history
|
|
202
|
+
self.conversation_history = messages.copy()
|
|
203
|
+
|
|
115
204
|
tool_calls = []
|
|
116
205
|
if msg.tool_calls:
|
|
117
206
|
for tc in msg.tool_calls:
|
|
@@ -123,7 +212,7 @@ class GenericOpenAIChatAgent(MCPAgent):
|
|
|
123
212
|
return AgentResponse(
|
|
124
213
|
content=msg.content or "",
|
|
125
214
|
tool_calls=tool_calls,
|
|
126
|
-
done=choice.finish_reason
|
|
215
|
+
done=choice.finish_reason in ("stop", "length"),
|
|
127
216
|
raw=response, # Include raw response for access to Choice objects
|
|
128
217
|
)
|
|
129
218
|
|
|
@@ -132,23 +221,68 @@ class GenericOpenAIChatAgent(MCPAgent):
|
|
|
132
221
|
tool_calls: list[MCPToolCall],
|
|
133
222
|
tool_results: list[MCPToolResult],
|
|
134
223
|
) -> list[Any]:
|
|
135
|
-
"""Render MCP tool results as OpenAI
|
|
224
|
+
"""Render MCP tool results as OpenAI messages.
|
|
225
|
+
|
|
226
|
+
Note: OpenAI tool messages only support string content.
|
|
227
|
+
When images are present, we return both a tool message and a user message.
|
|
228
|
+
"""
|
|
136
229
|
rendered: list[dict[str, Any]] = []
|
|
137
230
|
for call, res in zip(tool_calls, tool_results, strict=False):
|
|
138
|
-
if
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
231
|
+
# Use structuredContent.result if available, otherwise use content
|
|
232
|
+
items = res.content
|
|
233
|
+
if res.structuredContent and isinstance(res.structuredContent, dict):
|
|
234
|
+
items = res.structuredContent.get("result", res.content)
|
|
235
|
+
|
|
236
|
+
# Separate text and image content
|
|
237
|
+
text_parts = []
|
|
238
|
+
image_parts = []
|
|
239
|
+
|
|
240
|
+
for item in items:
|
|
241
|
+
if isinstance(item, dict):
|
|
242
|
+
if item.get("type") == "text":
|
|
243
|
+
text_parts.append(item.get("text", ""))
|
|
244
|
+
elif item.get("type") == "image":
|
|
245
|
+
mime_type = item.get("mimeType", "image/png")
|
|
246
|
+
data = item.get("data", "")
|
|
247
|
+
image_parts.append(
|
|
248
|
+
{
|
|
249
|
+
"type": "image_url",
|
|
250
|
+
"image_url": {
|
|
251
|
+
"url": f"data:{mime_type};base64,{data}"
|
|
252
|
+
},
|
|
253
|
+
}
|
|
254
|
+
)
|
|
255
|
+
elif isinstance(item, types.TextContent):
|
|
256
|
+
text_parts.append(item.text)
|
|
257
|
+
elif isinstance(item, types.ImageContent):
|
|
258
|
+
image_parts.append(
|
|
259
|
+
{
|
|
260
|
+
"type": "image_url",
|
|
261
|
+
"image_url": {"url": f"data:{item.mimeType};base64,{item.data}"},
|
|
262
|
+
}
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
text_content = "".join(text_parts) if text_parts else "Tool executed successfully"
|
|
147
266
|
rendered.append(
|
|
148
267
|
{
|
|
149
268
|
"role": "tool",
|
|
150
269
|
"tool_call_id": call.id,
|
|
151
|
-
"content":
|
|
270
|
+
"content": text_content,
|
|
152
271
|
}
|
|
153
272
|
)
|
|
273
|
+
|
|
274
|
+
# If there are images, add them as a separate user message
|
|
275
|
+
if image_parts:
|
|
276
|
+
# Add a user message with the images
|
|
277
|
+
content_with_images = [
|
|
278
|
+
{"type": "text", "text": "Tool returned the following:"},
|
|
279
|
+
*image_parts
|
|
280
|
+
]
|
|
281
|
+
rendered.append(
|
|
282
|
+
{
|
|
283
|
+
"role": "user",
|
|
284
|
+
"content": content_with_images,
|
|
285
|
+
}
|
|
286
|
+
)
|
|
287
|
+
|
|
154
288
|
return rendered
|
hud/cli/rl/__init__.py
CHANGED
|
@@ -23,7 +23,10 @@ def rl_main(
|
|
|
23
23
|
ctx: typer.Context,
|
|
24
24
|
model: str = typer.Option("Qwen/Qwen2.5-3B-Instruct", "--model", "-m", help="Model to train"),
|
|
25
25
|
dataset: str | None = typer.Option(
|
|
26
|
-
None,
|
|
26
|
+
None,
|
|
27
|
+
"--dataset",
|
|
28
|
+
"-d",
|
|
29
|
+
help="Dataset: JSON file path or HuggingFace name (auto-detects if not provided)",
|
|
27
30
|
),
|
|
28
31
|
config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"), # noqa: B008
|
|
29
32
|
gpus: str = typer.Option("2xA100", "--gpus", help="GPU configuration (e.g., 2xA100, 4xH100)"),
|
|
@@ -39,9 +42,15 @@ def rl_main(
|
|
|
39
42
|
3. Push environment to registry if needed
|
|
40
43
|
4. Start remote training on Prime Intellect
|
|
41
44
|
|
|
45
|
+
Dataset can be:
|
|
46
|
+
- A local JSON file with tasks (e.g., tasks.json)
|
|
47
|
+
- A HuggingFace dataset name (e.g., 'username/dataset-name')
|
|
48
|
+
- Auto-detected from current directory if not specified
|
|
49
|
+
|
|
42
50
|
Examples:
|
|
43
|
-
hud rl # Interactive mode
|
|
51
|
+
hud rl # Interactive mode, auto-detect tasks.json
|
|
44
52
|
hud rl --model gpt2 # Train with specific model
|
|
53
|
+
hud rl --dataset tasks.json # Use local task file
|
|
45
54
|
hud rl --gpus 4xH100 # Use different GPU configuration
|
|
46
55
|
hud rl init my-env:latest # Generate config for environment
|
|
47
56
|
"""
|
hud/cli/rl/pod.py
CHANGED
|
@@ -62,6 +62,7 @@ async def create_and_connect_prime_pod(
|
|
|
62
62
|
image: str,
|
|
63
63
|
team_id: str | None = None,
|
|
64
64
|
dataset_size: int | None = None,
|
|
65
|
+
is_json_file: bool = False,
|
|
65
66
|
) -> None:
|
|
66
67
|
"""Create a Prime Intellect pod and connect to it for training."""
|
|
67
68
|
design.section_title("🌐 Creating Prime Intellect Pod")
|
|
@@ -330,6 +331,7 @@ async def create_and_connect_prime_pod(
|
|
|
330
331
|
output_dir=output_dir,
|
|
331
332
|
image=image,
|
|
332
333
|
dataset_size=dataset_size,
|
|
334
|
+
is_json_file=is_json_file,
|
|
333
335
|
)
|
|
334
336
|
else:
|
|
335
337
|
# Manual fallback
|
|
@@ -457,6 +459,7 @@ async def run_prime_training(
|
|
|
457
459
|
auto_create_pod: str | None = None,
|
|
458
460
|
team_id: str | None = None,
|
|
459
461
|
dataset_size: int | None = None,
|
|
462
|
+
is_json_file: bool = False,
|
|
460
463
|
) -> None:
|
|
461
464
|
"""Run training on Prime Intellect infrastructure."""
|
|
462
465
|
# Check API key
|
|
@@ -488,4 +491,5 @@ async def run_prime_training(
|
|
|
488
491
|
image=image,
|
|
489
492
|
team_id=team_id,
|
|
490
493
|
dataset_size=dataset_size,
|
|
494
|
+
is_json_file=is_json_file,
|
|
491
495
|
)
|
hud/cli/rl/ssh.py
CHANGED
|
@@ -101,6 +101,7 @@ async def connect_and_train(
|
|
|
101
101
|
output_dir: Path,
|
|
102
102
|
image: str,
|
|
103
103
|
dataset_size: int | None = None,
|
|
104
|
+
is_json_file: bool = False,
|
|
104
105
|
) -> None:
|
|
105
106
|
"""Connect to the pod via SSH and run training commands."""
|
|
106
107
|
design.section_title("🚀 Starting Remote Training")
|
|
@@ -175,6 +176,37 @@ async def connect_and_train(
|
|
|
175
176
|
design.info("Make sure scp is installed and in your PATH")
|
|
176
177
|
raise typer.Exit(1) from e
|
|
177
178
|
|
|
179
|
+
# If dataset is a JSON file, copy it too
|
|
180
|
+
remote_dataset = dataset # Default to unchanged
|
|
181
|
+
if is_json_file:
|
|
182
|
+
design.info("Copying task file to pod...")
|
|
183
|
+
try:
|
|
184
|
+
# On Windows, we need to ensure proper path formatting
|
|
185
|
+
dataset_path = str(dataset).replace("\\", "/")
|
|
186
|
+
# Extract just the filename for the remote path
|
|
187
|
+
dataset_filename = os.path.basename(dataset)
|
|
188
|
+
remote_dataset = f"/root/{dataset_filename}"
|
|
189
|
+
|
|
190
|
+
scp_cmd = [
|
|
191
|
+
"scp",
|
|
192
|
+
"-i",
|
|
193
|
+
str(ssh_key_path),
|
|
194
|
+
"-P",
|
|
195
|
+
ssh_port,
|
|
196
|
+
"-o",
|
|
197
|
+
"StrictHostKeyChecking=no",
|
|
198
|
+
"-o",
|
|
199
|
+
"UserKnownHostsFile=/dev/null",
|
|
200
|
+
dataset_path,
|
|
201
|
+
f"{ssh_user_host}:{remote_dataset}",
|
|
202
|
+
]
|
|
203
|
+
design.debug(f"Running: {' '.join(scp_cmd)}")
|
|
204
|
+
subprocess.run(scp_cmd, check=True) # noqa: S603, ASYNC221
|
|
205
|
+
design.success(f"Task file copied to {remote_dataset}")
|
|
206
|
+
except subprocess.CalledProcessError as e:
|
|
207
|
+
design.error(f"Failed to copy task file: {e}")
|
|
208
|
+
raise typer.Exit(1) from e
|
|
209
|
+
|
|
178
210
|
design.info("Setting up environment and starting training...")
|
|
179
211
|
design.info("This will take a few minutes for initial setup, then training will begin.")
|
|
180
212
|
design.info("")
|
|
@@ -196,7 +228,7 @@ async def connect_and_train(
|
|
|
196
228
|
"# Load environment",
|
|
197
229
|
"env = vf.load_environment(",
|
|
198
230
|
' env_id="hud-vf-gym",',
|
|
199
|
-
f' taskset="{
|
|
231
|
+
f' taskset="{remote_dataset}",',
|
|
200
232
|
' config_path="/root/config.yaml",',
|
|
201
233
|
f" num_tasks={dataset_size},",
|
|
202
234
|
")",
|
|
@@ -242,7 +274,7 @@ async def connect_and_train(
|
|
|
242
274
|
"uv venv --python 3.12 && "
|
|
243
275
|
"source .venv/bin/activate && "
|
|
244
276
|
# Install packages
|
|
245
|
-
"prime env install hud/hud-vf-gym@0.1.
|
|
277
|
+
"prime env install hud/hud-vf-gym@0.1.1 && "
|
|
246
278
|
"uv pip install 'verifiers[train]' && "
|
|
247
279
|
"uv pip install flash-attn --no-build-isolation && "
|
|
248
280
|
# Set environment variables
|
hud/cli/rl/train.py
CHANGED
|
@@ -23,6 +23,40 @@ from .utils import (
|
|
|
23
23
|
design = HUDDesign()
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
def find_task_json_files() -> list[Path]:
|
|
27
|
+
"""Find JSON files containing tasks in the current directory."""
|
|
28
|
+
json_files = []
|
|
29
|
+
patterns = [
|
|
30
|
+
"*task*.json",
|
|
31
|
+
"*eval*.json",
|
|
32
|
+
"*Task*.json",
|
|
33
|
+
"*Eval*.json",
|
|
34
|
+
"*TASK*.json",
|
|
35
|
+
"*EVAL*.json",
|
|
36
|
+
"tasks.json", # Most common name
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
# First check current directory
|
|
40
|
+
for pattern in patterns:
|
|
41
|
+
json_files.extend(Path(".").glob(pattern))
|
|
42
|
+
|
|
43
|
+
# If no files found, search one level deep
|
|
44
|
+
if not json_files:
|
|
45
|
+
for pattern in patterns:
|
|
46
|
+
json_files.extend(Path(".").glob(f"*/{pattern}"))
|
|
47
|
+
|
|
48
|
+
# Remove duplicates and sort, prioritizing "tasks.json"
|
|
49
|
+
json_files = sorted(set(json_files))
|
|
50
|
+
|
|
51
|
+
# Put tasks.json first if it exists
|
|
52
|
+
tasks_json = Path("tasks.json")
|
|
53
|
+
if tasks_json in json_files:
|
|
54
|
+
json_files.remove(tasks_json)
|
|
55
|
+
json_files.insert(0, tasks_json)
|
|
56
|
+
|
|
57
|
+
return json_files
|
|
58
|
+
|
|
59
|
+
|
|
26
60
|
def train_command_wrapper(
|
|
27
61
|
model: str,
|
|
28
62
|
dataset: str | None,
|
|
@@ -128,45 +162,22 @@ def train_command_wrapper(
|
|
|
128
162
|
raise typer.Exit(1)
|
|
129
163
|
|
|
130
164
|
if "dataset" in missing:
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
165
|
+
if missing["dataset"] == "multiple_json":
|
|
166
|
+
# Multiple JSON files found, let user choose
|
|
167
|
+
json_files = find_task_json_files()
|
|
168
|
+
design.info("Multiple task files found:")
|
|
169
|
+
file_choice = design.select(
|
|
170
|
+
"Select a task file to use:",
|
|
171
|
+
choices=[str(f) for f in json_files],
|
|
172
|
+
)
|
|
173
|
+
dataset = file_choice
|
|
174
|
+
design.success(f"Selected: {dataset}")
|
|
175
|
+
elif missing["dataset"] == "none":
|
|
176
|
+
design.error("No dataset specified and no task JSON files found")
|
|
177
|
+
design.info("Please use --dataset or create a tasks.json file")
|
|
178
|
+
design.hint(
|
|
179
|
+
"Example: hud hf --name my-org/my-tasks # Generate tasks from HUD evaluation"
|
|
137
180
|
)
|
|
138
|
-
|
|
139
|
-
if create_dataset == "Yes, upload to HuggingFace":
|
|
140
|
-
dataset_name = typer.prompt("Enter dataset name (e.g., username/dataset-name)")
|
|
141
|
-
|
|
142
|
-
if not validate_dataset_name(dataset_name):
|
|
143
|
-
design.error("Invalid dataset name format. Expected: username/dataset-name")
|
|
144
|
-
raise typer.Exit(1)
|
|
145
|
-
|
|
146
|
-
design.info(f"Running 'hud hf tasks.json --name {dataset_name}'...")
|
|
147
|
-
design.info("")
|
|
148
|
-
|
|
149
|
-
# Run hf command
|
|
150
|
-
result = subprocess.run( # noqa: S603
|
|
151
|
-
["hud", "hf", "tasks.json", "--name", dataset_name], # noqa: S607
|
|
152
|
-
capture_output=True,
|
|
153
|
-
text=True,
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
if result.returncode == 0:
|
|
157
|
-
design.success("Dataset uploaded successfully")
|
|
158
|
-
dataset = dataset_name
|
|
159
|
-
else:
|
|
160
|
-
design.error("Failed to upload dataset")
|
|
161
|
-
if result.stderr:
|
|
162
|
-
design.error(result.stderr)
|
|
163
|
-
raise typer.Exit(1)
|
|
164
|
-
else:
|
|
165
|
-
design.info("Please specify a dataset with --dataset")
|
|
166
|
-
raise typer.Exit(1)
|
|
167
|
-
else:
|
|
168
|
-
design.error("No dataset specified and no tasks.json found")
|
|
169
|
-
design.info("Use --dataset to specify a HuggingFace dataset")
|
|
170
181
|
raise typer.Exit(1)
|
|
171
182
|
|
|
172
183
|
# Ask about pod creation for Prime training
|
|
@@ -247,9 +258,123 @@ async def train_command(
|
|
|
247
258
|
design.hint("Run 'hud build' first or specify with 'hud rl init <image>'")
|
|
248
259
|
raise typer.Exit(1)
|
|
249
260
|
|
|
250
|
-
#
|
|
261
|
+
# Handle dataset (JSON file or HuggingFace dataset)
|
|
251
262
|
dataset_size = None
|
|
252
|
-
|
|
263
|
+
is_json_file = False
|
|
264
|
+
|
|
265
|
+
# Use dataset from command or look for JSON files
|
|
266
|
+
if not dataset:
|
|
267
|
+
# Check for JSON files if no dataset specified
|
|
268
|
+
json_files = find_task_json_files()
|
|
269
|
+
if json_files:
|
|
270
|
+
if len(json_files) == 1:
|
|
271
|
+
dataset = str(json_files[0])
|
|
272
|
+
design.info(f"Found task file: {dataset}")
|
|
273
|
+
is_json_file = True
|
|
274
|
+
else:
|
|
275
|
+
# This case should have been handled in train_command_wrapper
|
|
276
|
+
design.error("Multiple task files found but none selected")
|
|
277
|
+
raise typer.Exit(1)
|
|
278
|
+
else:
|
|
279
|
+
# Use dataset from lock file
|
|
280
|
+
dataset = get_primary_dataset()
|
|
281
|
+
if dataset:
|
|
282
|
+
design.info(f"Using dataset from lock file: {dataset}")
|
|
283
|
+
|
|
284
|
+
# Check if dataset is a file path
|
|
285
|
+
if dataset and Path(dataset).exists() and dataset.endswith(".json"):
|
|
286
|
+
is_json_file = True
|
|
287
|
+
|
|
288
|
+
# Validate dataset
|
|
289
|
+
if dataset and is_json_file:
|
|
290
|
+
# Load and validate JSON file
|
|
291
|
+
design.info(f"Validating task file: {dataset}")
|
|
292
|
+
try:
|
|
293
|
+
with open(dataset) as f: # noqa: ASYNC230
|
|
294
|
+
tasks_data = json.load(f)
|
|
295
|
+
|
|
296
|
+
# Handle both single task and array of tasks
|
|
297
|
+
if isinstance(tasks_data, dict):
|
|
298
|
+
tasks = [tasks_data]
|
|
299
|
+
elif isinstance(tasks_data, list):
|
|
300
|
+
tasks = tasks_data
|
|
301
|
+
else:
|
|
302
|
+
design.error("Invalid tasks file format")
|
|
303
|
+
raise typer.Exit(1)
|
|
304
|
+
|
|
305
|
+
dataset_size = len(tasks)
|
|
306
|
+
if dataset_size < 4:
|
|
307
|
+
design.error(f"Task file has only {dataset_size} tasks")
|
|
308
|
+
design.info("RL training requires at least 4 tasks for proper batching")
|
|
309
|
+
design.hint("Consider adding more tasks to your JSON file")
|
|
310
|
+
raise typer.Exit(1)
|
|
311
|
+
|
|
312
|
+
design.success(f"✓ Task file has {dataset_size} tasks")
|
|
313
|
+
|
|
314
|
+
# Check and convert MCP configs to remote if needed
|
|
315
|
+
if tasks:
|
|
316
|
+
sample_task = tasks[0]
|
|
317
|
+
sample_mcp_config = sample_task.get("mcp_config", {})
|
|
318
|
+
|
|
319
|
+
# Check if using local MCP configs
|
|
320
|
+
config_type = "unknown"
|
|
321
|
+
for server_config in sample_mcp_config.values():
|
|
322
|
+
if isinstance(server_config, dict) and "url" in server_config:
|
|
323
|
+
url = server_config.get("url", "")
|
|
324
|
+
if "mcp.hud.so" in url:
|
|
325
|
+
config_type = "remote"
|
|
326
|
+
break
|
|
327
|
+
else:
|
|
328
|
+
config_type = "local"
|
|
329
|
+
|
|
330
|
+
if config_type == "local":
|
|
331
|
+
design.info("Converting local MCP configs to remote for training...")
|
|
332
|
+
|
|
333
|
+
# Get the image name from lock file or environment
|
|
334
|
+
from .utils import get_image_from_lock
|
|
335
|
+
|
|
336
|
+
env_image = image or get_image_from_lock()
|
|
337
|
+
|
|
338
|
+
if not env_image:
|
|
339
|
+
design.error("No image found for remote MCP conversion")
|
|
340
|
+
design.hint("Run 'hud build' first")
|
|
341
|
+
raise typer.Exit(1)
|
|
342
|
+
|
|
343
|
+
# Check if image needs to be pushed
|
|
344
|
+
if "/" not in env_image or env_image.startswith("local/"):
|
|
345
|
+
design.warning(f"Image '{env_image}' appears to be local only")
|
|
346
|
+
design.info("Running 'hud push' to make it publicly available...")
|
|
347
|
+
from hud.cli.push import push_command
|
|
348
|
+
|
|
349
|
+
push_command(directory=".", yes=True)
|
|
350
|
+
design.success("Image pushed successfully")
|
|
351
|
+
# Re-read image name after push
|
|
352
|
+
env_image = get_image_from_lock()
|
|
353
|
+
|
|
354
|
+
# Convert all tasks to use remote MCP
|
|
355
|
+
for task in tasks:
|
|
356
|
+
remote_config = {
|
|
357
|
+
"hud": {
|
|
358
|
+
"url": "https://mcp.hud.so/v3/mcp",
|
|
359
|
+
"headers": {
|
|
360
|
+
"Authorization": "Bearer $HUD_API_KEY",
|
|
361
|
+
"Mcp-Image": env_image,
|
|
362
|
+
},
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
task["mcp_config"] = remote_config
|
|
366
|
+
|
|
367
|
+
design.success("✓ Converted all tasks to use remote MCP configs")
|
|
368
|
+
|
|
369
|
+
# Save the modified tasks back to the file
|
|
370
|
+
with open(dataset, "w") as f: # noqa: ASYNC230
|
|
371
|
+
json.dump(tasks, f, indent=2)
|
|
372
|
+
design.info("Updated task file with remote configs")
|
|
373
|
+
except json.JSONDecodeError as e:
|
|
374
|
+
design.error(f"Invalid JSON in task file: {e}")
|
|
375
|
+
raise typer.Exit(1) from e
|
|
376
|
+
elif dataset:
|
|
377
|
+
# Validate HuggingFace dataset
|
|
253
378
|
design.info(f"Validating dataset: {dataset}")
|
|
254
379
|
try:
|
|
255
380
|
# Try to load dataset info from HuggingFace
|
|
@@ -273,12 +398,6 @@ async def train_command(
|
|
|
273
398
|
design.warning(f"Could not validate dataset size: {e}")
|
|
274
399
|
design.info("Proceeding with training - ensure dataset has at least 4 tasks")
|
|
275
400
|
|
|
276
|
-
# Use dataset from command or lock file
|
|
277
|
-
if not dataset:
|
|
278
|
-
dataset = get_primary_dataset()
|
|
279
|
-
if dataset:
|
|
280
|
-
design.info(f"Using dataset from lock file: {dataset}")
|
|
281
|
-
|
|
282
401
|
# Display configuration
|
|
283
402
|
design.section_title("📋 Training Configuration")
|
|
284
403
|
design.json_config(
|
|
@@ -318,6 +437,7 @@ async def train_command(
|
|
|
318
437
|
auto_create_pod=auto_create_pod,
|
|
319
438
|
team_id=team_id,
|
|
320
439
|
dataset_size=dataset_size,
|
|
440
|
+
is_json_file=is_json_file,
|
|
321
441
|
)
|
|
322
442
|
|
|
323
443
|
|
|
@@ -340,10 +460,19 @@ def check_requirements(config: Path | None, dataset: str | None) -> dict[str, An
|
|
|
340
460
|
|
|
341
461
|
# Check dataset
|
|
342
462
|
if not dataset:
|
|
343
|
-
#
|
|
344
|
-
|
|
345
|
-
if
|
|
346
|
-
|
|
463
|
+
# First check for JSON files (preferred method)
|
|
464
|
+
json_files = find_task_json_files()
|
|
465
|
+
if json_files:
|
|
466
|
+
if len(json_files) == 1:
|
|
467
|
+
# Will be auto-selected
|
|
468
|
+
pass
|
|
469
|
+
else:
|
|
470
|
+
missing["dataset"] = "multiple_json"
|
|
471
|
+
else:
|
|
472
|
+
# Check lock file for HuggingFace dataset
|
|
473
|
+
primary_dataset = get_primary_dataset()
|
|
474
|
+
if not primary_dataset:
|
|
475
|
+
missing["dataset"] = "none"
|
|
347
476
|
|
|
348
477
|
return missing
|
|
349
478
|
|
|
@@ -407,13 +536,23 @@ async def run_remote_training(
|
|
|
407
536
|
auto_create_pod: str | None = None,
|
|
408
537
|
team_id: str | None = None,
|
|
409
538
|
dataset_size: int | None = None,
|
|
539
|
+
is_json_file: bool = False,
|
|
410
540
|
) -> None:
|
|
411
541
|
"""Run training on remote infrastructure."""
|
|
412
542
|
design.section_title("🚀 Remote Training")
|
|
413
543
|
|
|
414
544
|
if provider == "prime":
|
|
415
545
|
await run_prime_training(
|
|
416
|
-
model,
|
|
546
|
+
model,
|
|
547
|
+
dataset,
|
|
548
|
+
config,
|
|
549
|
+
gpus,
|
|
550
|
+
output_dir,
|
|
551
|
+
image,
|
|
552
|
+
auto_create_pod,
|
|
553
|
+
team_id,
|
|
554
|
+
dataset_size,
|
|
555
|
+
is_json_file,
|
|
417
556
|
)
|
|
418
557
|
else:
|
|
419
558
|
design.error(f"Provider '{provider}' not yet supported")
|
|
@@ -40,6 +40,7 @@ def _process_worker(
|
|
|
40
40
|
2. Creates its own event loop
|
|
41
41
|
3. Processes a batch of tasks asynchronously
|
|
42
42
|
4. Returns results with their original indices
|
|
43
|
+
5. Handles interruption signals gracefully
|
|
43
44
|
|
|
44
45
|
Args:
|
|
45
46
|
task_batch: List of (index, task_dict) tuples
|
|
@@ -58,6 +59,7 @@ def _process_worker(
|
|
|
58
59
|
List of (index, result) tuples
|
|
59
60
|
"""
|
|
60
61
|
# Import inside worker to avoid pickling issues
|
|
62
|
+
import signal
|
|
61
63
|
import sys
|
|
62
64
|
|
|
63
65
|
import hud
|
|
@@ -72,6 +74,14 @@ def _process_worker(
|
|
|
72
74
|
except AttributeError:
|
|
73
75
|
pass
|
|
74
76
|
|
|
77
|
+
# Set up signal handler for clean interruption
|
|
78
|
+
def signal_handler(signum: int, frame: Any) -> None:
|
|
79
|
+
logger.warning("Worker %s: Received interrupt signal", worker_id)
|
|
80
|
+
# Raise KeyboardInterrupt to actually interrupt the worker
|
|
81
|
+
raise KeyboardInterrupt(f"Worker {worker_id} interrupted by user")
|
|
82
|
+
|
|
83
|
+
signal.signal(signal.SIGINT, signal_handler)
|
|
84
|
+
|
|
75
85
|
# Reinitialize telemetry in this process
|
|
76
86
|
configure_telemetry()
|
|
77
87
|
|
|
@@ -157,8 +167,25 @@ def _process_worker(
|
|
|
157
167
|
# Process all tasks in parallel within this process
|
|
158
168
|
tasks = [process_single_task(idx, task_dict) for idx, task_dict in task_batch]
|
|
159
169
|
|
|
160
|
-
|
|
161
|
-
|
|
170
|
+
try:
|
|
171
|
+
results = await asyncio.gather(*tasks, return_exceptions=False)
|
|
172
|
+
return results
|
|
173
|
+
except asyncio.CancelledError:
|
|
174
|
+
logger.info("Worker %s: Tasks cancelled due to interruption", worker_id)
|
|
175
|
+
# Return error results for all tasks
|
|
176
|
+
return [
|
|
177
|
+
(
|
|
178
|
+
idx,
|
|
179
|
+
{
|
|
180
|
+
"error": "Task cancelled (Ctrl+C)",
|
|
181
|
+
"isError": True,
|
|
182
|
+
"reward": 0.0,
|
|
183
|
+
"done": False,
|
|
184
|
+
"content": "Task cancelled",
|
|
185
|
+
},
|
|
186
|
+
)
|
|
187
|
+
for idx, _ in task_batch
|
|
188
|
+
]
|
|
162
189
|
|
|
163
190
|
try:
|
|
164
191
|
# Run the async batch processing
|
|
@@ -180,6 +207,24 @@ def _process_worker(
|
|
|
180
207
|
logger.warning("Worker %s: Telemetry flush timed out", worker_id)
|
|
181
208
|
|
|
182
209
|
return results
|
|
210
|
+
except KeyboardInterrupt:
|
|
211
|
+
logger.info("Worker %s: Interrupted by user, stopping gracefully", worker_id)
|
|
212
|
+
# Return partial results for tasks that completed
|
|
213
|
+
partial_results = []
|
|
214
|
+
for idx, _ in task_batch:
|
|
215
|
+
partial_results.append(
|
|
216
|
+
(
|
|
217
|
+
idx,
|
|
218
|
+
{
|
|
219
|
+
"error": "Worker interrupted by user (Ctrl+C)",
|
|
220
|
+
"isError": True,
|
|
221
|
+
"reward": 0.0,
|
|
222
|
+
"done": False,
|
|
223
|
+
"content": "Task interrupted",
|
|
224
|
+
},
|
|
225
|
+
)
|
|
226
|
+
)
|
|
227
|
+
return partial_results
|
|
183
228
|
except Exception as e:
|
|
184
229
|
logger.error("[Worker %s] Batch processing failed: %s", worker_id, e)
|
|
185
230
|
logger.error("Worker %s batch processing failed: %s", worker_id, e)
|
|
@@ -365,7 +410,8 @@ async def run_dataset_parallel_manual(
|
|
|
365
410
|
)
|
|
366
411
|
|
|
367
412
|
# Process batches in parallel using ProcessPoolExecutor
|
|
368
|
-
|
|
413
|
+
executor = ProcessPoolExecutor(max_workers=max_workers)
|
|
414
|
+
try:
|
|
369
415
|
# Submit all batches to workers
|
|
370
416
|
future_to_batch = {
|
|
371
417
|
executor.submit(worker_func, batch, worker_id=i): batch
|
|
@@ -377,48 +423,78 @@ async def run_dataset_parallel_manual(
|
|
|
377
423
|
total = len(task_dicts)
|
|
378
424
|
|
|
379
425
|
# Process results as they complete
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
426
|
+
try:
|
|
427
|
+
for future in as_completed(future_to_batch):
|
|
428
|
+
batch = future_to_batch[future]
|
|
429
|
+
|
|
430
|
+
try:
|
|
431
|
+
# Get results from this worker
|
|
432
|
+
batch_results = future.result()
|
|
433
|
+
|
|
434
|
+
# Place results in correct positions
|
|
435
|
+
for index, result in batch_results:
|
|
436
|
+
results[index] = result
|
|
437
|
+
completed += 1
|
|
438
|
+
|
|
439
|
+
# Calculate success rate so far
|
|
440
|
+
successful_so_far = sum(
|
|
441
|
+
1
|
|
442
|
+
for r in results[:completed]
|
|
443
|
+
if r is not None and getattr(r, "reward", 0) > 0
|
|
444
|
+
)
|
|
398
445
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
446
|
+
progress_msg = (
|
|
447
|
+
f"Progress: {completed}/{total} tasks completed "
|
|
448
|
+
f"({100 * completed / total:.1f}%) | "
|
|
449
|
+
f"Success rate: {successful_so_far}/{completed} "
|
|
450
|
+
f"({100 * successful_so_far / completed:.1f}%)"
|
|
451
|
+
)
|
|
405
452
|
|
|
406
|
-
|
|
453
|
+
logger.info(progress_msg)
|
|
407
454
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
455
|
+
except Exception as e:
|
|
456
|
+
# Handle worker failure
|
|
457
|
+
logger.error(
|
|
458
|
+
"Worker failed with exception: %s\n%s", e, traceback.format_exc()
|
|
459
|
+
)
|
|
411
460
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
461
|
+
# Mark all tasks in this batch as failed
|
|
462
|
+
for index, _ in batch:
|
|
463
|
+
results[index] = {
|
|
464
|
+
"error": f"Worker process failed: {e}",
|
|
465
|
+
"isError": True,
|
|
466
|
+
"reward": 0.0,
|
|
467
|
+
"done": False,
|
|
468
|
+
"content": f"Worker process failed: {e}",
|
|
469
|
+
}
|
|
470
|
+
completed += 1
|
|
471
|
+
|
|
472
|
+
except KeyboardInterrupt:
|
|
473
|
+
logger.warning("\n⚠️ Parallel evaluation interrupted by user (Ctrl+C)")
|
|
474
|
+
logger.info("Cancelling pending tasks...")
|
|
475
|
+
|
|
476
|
+
# Cancel all pending futures
|
|
477
|
+
for future in future_to_batch:
|
|
478
|
+
if not future.done():
|
|
479
|
+
future.cancel()
|
|
480
|
+
|
|
481
|
+
# Mark uncompleted tasks as interrupted
|
|
482
|
+
for i, r in enumerate(results):
|
|
483
|
+
if r is None:
|
|
484
|
+
results[i] = {
|
|
485
|
+
"error": "Evaluation interrupted by user",
|
|
416
486
|
"isError": True,
|
|
417
487
|
"reward": 0.0,
|
|
418
488
|
"done": False,
|
|
419
|
-
"content":
|
|
489
|
+
"content": "Task interrupted (Ctrl+C)",
|
|
420
490
|
}
|
|
421
|
-
|
|
491
|
+
|
|
492
|
+
logger.info("Interrupted after %s/%s tasks", completed, total)
|
|
493
|
+
raise # Re-raise to propagate the interrupt
|
|
494
|
+
|
|
495
|
+
finally:
|
|
496
|
+
# Always shutdown the executor properly
|
|
497
|
+
executor.shutdown(wait=False, cancel_futures=True)
|
|
422
498
|
|
|
423
499
|
# Verify all results are populated
|
|
424
500
|
missing = [i for i, r in enumerate(results) if r is None]
|
hud/otel/exporters.py
CHANGED
|
@@ -14,6 +14,7 @@ from __future__ import annotations
|
|
|
14
14
|
import contextlib
|
|
15
15
|
import json
|
|
16
16
|
import logging
|
|
17
|
+
import time
|
|
17
18
|
from collections import defaultdict
|
|
18
19
|
from datetime import UTC, datetime
|
|
19
20
|
from typing import TYPE_CHECKING, Any
|
|
@@ -362,5 +363,7 @@ class HudSpanExporter(SpanExporter):
|
|
|
362
363
|
pass
|
|
363
364
|
|
|
364
365
|
def force_flush(self, timeout_millis: int | None = None) -> bool: # type: ignore[override]
|
|
366
|
+
if timeout_millis:
|
|
367
|
+
time.sleep(timeout_millis / 1000)
|
|
365
368
|
# Synchronous export, nothing buffered here
|
|
366
369
|
return True
|
hud/otel/processors.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
import time
|
|
4
5
|
from typing import Any
|
|
5
6
|
|
|
6
7
|
from opentelemetry import baggage
|
|
@@ -115,4 +116,6 @@ class HudEnrichmentProcessor(SpanProcessor):
|
|
|
115
116
|
pass
|
|
116
117
|
|
|
117
118
|
def force_flush(self, timeout_millis: int | None = None) -> bool: # type: ignore[override]
|
|
119
|
+
if timeout_millis:
|
|
120
|
+
time.sleep(timeout_millis / 1000)
|
|
118
121
|
return True
|
hud/utils/tests/test_version.py
CHANGED
hud/version.py
CHANGED
|
@@ -2,15 +2,15 @@ hud/__init__.py,sha256=BjAhZtsHbGN371Q8t3o4v4jltedkmDE85xW0yOILU9g,397
|
|
|
2
2
|
hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
|
|
3
3
|
hud/settings.py,sha256=q9aZiHjvbL4oLE-N8AttTW4rmzS8zPMnsca-iMGyEGc,2362
|
|
4
4
|
hud/types.py,sha256=gNnyS1G7aYHIR5sT3k3bOfSTFnPylUO6lNGLWbjbeYk,5149
|
|
5
|
-
hud/version.py,sha256=
|
|
5
|
+
hud/version.py,sha256=8Ag1N-qzwxUt5QwVLTJ5Z43L6M6O6FLpCKva6zONOfc,105
|
|
6
6
|
hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
|
|
7
7
|
hud/agents/base.py,sha256=rbwYP_a6XTwhY_5CaBlE7SWflnTq1EOuDiNY2XeUWdM,28275
|
|
8
|
-
hud/agents/claude.py,sha256=
|
|
8
|
+
hud/agents/claude.py,sha256=_eD_XKZhVJ6grkHQfbS6JskztueomQcmJeGJMbfNdmE,14534
|
|
9
9
|
hud/agents/langchain.py,sha256=1EgCy8jfjunsWxlPC5XfvfLS6_XZVrIF1ZjtHcrvhYw,9584
|
|
10
|
-
hud/agents/openai.py,sha256=
|
|
11
|
-
hud/agents/openai_chat_generic.py,sha256=
|
|
10
|
+
hud/agents/openai.py,sha256=tvFYsZ5yaoLkfjMnHe-COxRttMsLRXBLPdSqgeipQRk,14257
|
|
11
|
+
hud/agents/openai_chat_generic.py,sha256=Q6eKlKQIF2o04eGpIcBAyqpdcgRvuolbxmgWTT6ktEQ,10478
|
|
12
12
|
hud/agents/misc/__init__.py,sha256=BYi4Ytp9b_vycpZFXnr5Oyw6ncKLNNGml8Jrb7bWUb4,136
|
|
13
|
-
hud/agents/misc/response_agent.py,sha256=
|
|
13
|
+
hud/agents/misc/response_agent.py,sha256=pnaomb4H-QJm1YKU3tC1YnZXxOlDbTHIXaIH-6Nkb6I,3102
|
|
14
14
|
hud/agents/tests/__init__.py,sha256=W-O-_4i34d9TTyEHV-O_q1Ai1gLhzwDaaPo02_TWQIY,34
|
|
15
15
|
hud/agents/tests/test_base.py,sha256=F39ajSqASGUbPyPoWSY9KARFav62qNTK74W11Tr1Tg4,28970
|
|
16
16
|
hud/agents/tests/test_claude.py,sha256=wqEKlzEvx8obz1sSm4NY0j-Zyt1qWNfDOmRqYIuAEd0,13069
|
|
@@ -31,11 +31,11 @@ hud/cli/pull.py,sha256=JHwCwUwRO0Nzbgm9mkjsz6EpxbxgwQVhgNSY64nNZ-s,11969
|
|
|
31
31
|
hud/cli/push.py,sha256=4KrEHj0_i3xJNCB3eRjANmHFhSW4MFfpnld3nfVYENs,17904
|
|
32
32
|
hud/cli/remove.py,sha256=USAvB6pbMA3jd19xUtLEBiMsklVTEfE2Maw9nYcpSAE,6619
|
|
33
33
|
hud/cli/rl/README.md,sha256=3pqRZMrnwD-lJwWGCCNZNhGdZG6zyydLBOer0e8BkLw,5983
|
|
34
|
-
hud/cli/rl/__init__.py,sha256=
|
|
34
|
+
hud/cli/rl/__init__.py,sha256=g_Crqn5o0m9xANrTOkQZENWVlwHAV6MWiobte-FfqiY,3412
|
|
35
35
|
hud/cli/rl/init.py,sha256=GXVOXLrX8CVAgpJ1pHuk6Y6oujbh46Rtz8kG18jGzk8,13789
|
|
36
|
-
hud/cli/rl/pod.py,sha256=
|
|
37
|
-
hud/cli/rl/ssh.py,sha256=
|
|
38
|
-
hud/cli/rl/train.py,sha256=
|
|
36
|
+
hud/cli/rl/pod.py,sha256=ZiXI-RG9YsnKx1EWzufcqklBdaD_d6XFtD45a0H8KpM,18837
|
|
37
|
+
hud/cli/rl/ssh.py,sha256=bHAieonseJPON7P1mwB2GPWKLDlLZuvQniONmr5ZfcE,11523
|
|
38
|
+
hud/cli/rl/train.py,sha256=sjY4J0TCp8647kzuIHyEeIsFVGtE0tllT0GzhkPPrWY,19895
|
|
39
39
|
hud/cli/rl/utils.py,sha256=ZW3sjl5KaHZaOCjAbut_QIpQvxgzlxjPGuM6fuYkU9I,4836
|
|
40
40
|
hud/cli/tests/__init__.py,sha256=ZrGVkmH7DHXGqOvjOSNGZeMYaFIRB2K8c6hwr8FPJ-8,68
|
|
41
41
|
hud/cli/tests/test_analyze.py,sha256=SwxvRlnw-VaEwKN2nd1FJAxfhieujPjh7PdQh_LYJ5E,11050
|
|
@@ -79,7 +79,7 @@ hud/datasets/__init__.py,sha256=74T4mrjELKtE04XkZKwU8QAJcg2wjqXLqRO9s4GlPr4,678
|
|
|
79
79
|
hud/datasets/task.py,sha256=V82HzRb2_c2MO9EG5ZcY-PMsLt3234Uks7WlkMta5HY,3615
|
|
80
80
|
hud/datasets/utils.py,sha256=3hKvZTkZuCRkTeITB86nNdA1dtHZAqFfAdSPMtcTUhs,4275
|
|
81
81
|
hud/datasets/execution/__init__.py,sha256=4m1AEpMQaUSJFVN_iAXvY6zFttVgZKwE6oQtC0Rrk7U,330
|
|
82
|
-
hud/datasets/execution/parallel.py,sha256=
|
|
82
|
+
hud/datasets/execution/parallel.py,sha256=4aL1XpS3vOBqZjgs0vrMZJ4eAoi86Td8C-m5SUtVxMs,25231
|
|
83
83
|
hud/datasets/execution/runner.py,sha256=EEvb90vvAqFXXx8NyVKLfK5p-gtsfJqiFJAoqSjyfXg,4695
|
|
84
84
|
hud/misc/__init__.py,sha256=m_pprQQ-G-Y0Sd0NEiR8MtAMbElnuFZ2OWT8TXrw7c4,43
|
|
85
85
|
hud/misc/claude_plays_pokemon.py,sha256=IthAkjDVr2Q-GNvX-QLJyMzN7-0pHqqJbagGNv2m7yo,10453
|
|
@@ -87,9 +87,9 @@ hud/otel/__init__.py,sha256=ii17ayoWiS5vAhA7UAmZ8TkmP52gs2pWyHsD46-uYbE,1003
|
|
|
87
87
|
hud/otel/collector.py,sha256=jLZymZ8r7xt2VDuWexfbnT7PY1-0aiyLMgjBy8KDY1M,4497
|
|
88
88
|
hud/otel/config.py,sha256=6np_C2UXhtKHHjY41HQxZElua2Eh_EUCBiRB_YuiSuc,6249
|
|
89
89
|
hud/otel/context.py,sha256=C9MvO99cRSNNDEDC7ehO3eoTPnb6J7AemUYvEp57yEU,17774
|
|
90
|
-
hud/otel/exporters.py,sha256=
|
|
90
|
+
hud/otel/exporters.py,sha256=RLAjWa8b2DJEU21740Idq4fmeIuabLEqGGUspcFDcH4,14331
|
|
91
91
|
hud/otel/instrumentation.py,sha256=xTjrkn2p490lJ8UlSD1SfzkPZsD8XKDocQqYQfwMMKo,3775
|
|
92
|
-
hud/otel/processors.py,sha256
|
|
92
|
+
hud/otel/processors.py,sha256=-gGRbwifplcExDQBLfx_9tqWreDImULJNcENgO9q7VU,4700
|
|
93
93
|
hud/otel/tests/__init__.py,sha256=VNJKBMaxTtbn7trW-1Ph50zCvCok_wTSGcI1HD6GOLA,43
|
|
94
94
|
hud/otel/tests/test_processors.py,sha256=np0R4ssd9j6LJSJykJ5bNjl0POwNYNhgb7BqOZHwcMY,6778
|
|
95
95
|
hud/server/__init__.py,sha256=8LUwgsXO8xiViWP7uImDwcOsWLu01r5F4r8U8qH3rSY,91
|
|
@@ -157,10 +157,10 @@ hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,
|
|
|
157
157
|
hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
|
|
158
158
|
hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
|
|
159
159
|
hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
|
|
160
|
-
hud/utils/tests/test_version.py,sha256=
|
|
160
|
+
hud/utils/tests/test_version.py,sha256=Ur5o4UVJbPy4rYJUIc3yBCTK-mk9CAf_7bHv2qSPJEI,160
|
|
161
161
|
hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
|
-
hud_python-0.4.
|
|
163
|
-
hud_python-0.4.
|
|
164
|
-
hud_python-0.4.
|
|
165
|
-
hud_python-0.4.
|
|
166
|
-
hud_python-0.4.
|
|
162
|
+
hud_python-0.4.18.dist-info/METADATA,sha256=vvUR4EBJmH6WqrLg2OxsupIJLs_6S8aVPaCRJjN3sJI,20287
|
|
163
|
+
hud_python-0.4.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
164
|
+
hud_python-0.4.18.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
|
|
165
|
+
hud_python-0.4.18.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
|
|
166
|
+
hud_python-0.4.18.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|