hud-python 0.4.36__py3-none-any.whl → 0.4.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/__init__.py +2 -0
- hud/agents/lite_llm.py +72 -0
- hud/agents/openai_chat_generic.py +21 -7
- hud/cli/__init__.py +19 -4
- hud/cli/build.py +17 -2
- hud/cli/dev.py +1 -1
- hud/cli/eval.py +93 -13
- hud/cli/flows/tasks.py +197 -65
- hud/cli/push.py +9 -0
- hud/cli/rl/__init__.py +14 -4
- hud/cli/rl/celebrate.py +187 -0
- hud/cli/rl/config.py +15 -8
- hud/cli/rl/local_runner.py +44 -20
- hud/cli/rl/remote_runner.py +163 -86
- hud/cli/rl/viewer.py +141 -0
- hud/cli/rl/wait_utils.py +89 -0
- hud/cli/utils/env_check.py +196 -0
- hud/cli/utils/source_hash.py +108 -0
- hud/clients/base.py +1 -1
- hud/clients/fastmcp.py +1 -1
- hud/otel/config.py +1 -1
- hud/otel/context.py +2 -2
- hud/rl/vllm_adapter.py +1 -1
- hud/server/server.py +84 -13
- hud/server/tests/test_add_tool.py +60 -0
- hud/server/tests/test_context.py +128 -0
- hud/server/tests/test_mcp_server_handlers.py +44 -0
- hud/server/tests/test_mcp_server_integration.py +405 -0
- hud/server/tests/test_mcp_server_more.py +247 -0
- hud/server/tests/test_run_wrapper.py +53 -0
- hud/server/tests/test_server_extra.py +166 -0
- hud/server/tests/test_sigterm_runner.py +78 -0
- hud/shared/hints.py +1 -1
- hud/telemetry/job.py +2 -2
- hud/types.py +9 -2
- hud/utils/tasks.py +32 -24
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/METADATA +14 -12
- {hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/RECORD +43 -29
- {hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/WHEEL +0 -0
- {hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/licenses/LICENSE +0 -0
hud/cli/rl/local_runner.py
CHANGED
|
@@ -30,6 +30,7 @@ def run_local_training(
|
|
|
30
30
|
model: str | None,
|
|
31
31
|
config_file: Path | None,
|
|
32
32
|
output_dir: str,
|
|
33
|
+
yes: bool,
|
|
33
34
|
restart: bool,
|
|
34
35
|
verbose: bool,
|
|
35
36
|
no_ddp: bool,
|
|
@@ -63,8 +64,11 @@ def run_local_training(
|
|
|
63
64
|
try:
|
|
64
65
|
import typer
|
|
65
66
|
|
|
66
|
-
if not
|
|
67
|
-
|
|
67
|
+
if not yes:
|
|
68
|
+
if not typer.confirm("\nDo you want to continue anyway?", default=False):
|
|
69
|
+
raise typer.Exit(1)
|
|
70
|
+
else:
|
|
71
|
+
hud_console.warning("Auto-continuing despite Python 3.13+ (--yes mode)")
|
|
68
72
|
except Exception as e:
|
|
69
73
|
hud_console.warning(f"Failed to confirm: {e}")
|
|
70
74
|
return
|
|
@@ -113,7 +117,13 @@ def run_local_training(
|
|
|
113
117
|
try:
|
|
114
118
|
import typer
|
|
115
119
|
|
|
116
|
-
|
|
120
|
+
if yes:
|
|
121
|
+
continue_training = True
|
|
122
|
+
hud_console.info("Auto-continuing with healthy GPUs only (--yes mode)")
|
|
123
|
+
else:
|
|
124
|
+
continue_training = typer.confirm(
|
|
125
|
+
"\nContinue with healthy GPUs only?", default=True
|
|
126
|
+
)
|
|
117
127
|
except Exception:
|
|
118
128
|
continue_training = True
|
|
119
129
|
|
|
@@ -200,21 +210,25 @@ def run_local_training(
|
|
|
200
210
|
|
|
201
211
|
# Step 3: Model selection (if not provided)
|
|
202
212
|
if model is None and not config_file:
|
|
203
|
-
|
|
204
|
-
"
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
213
|
+
if yes:
|
|
214
|
+
model = "Qwen/Qwen2.5-VL-3B-Instruct" # Default model in yes mode
|
|
215
|
+
hud_console.info(f"Auto-selecting model: {model} (--yes mode)")
|
|
216
|
+
else:
|
|
217
|
+
model = hud_console.select(
|
|
218
|
+
"Select a model for RL training:",
|
|
219
|
+
choices=[
|
|
220
|
+
{
|
|
221
|
+
"name": "Qwen 2.5 VL 3B (Recommended - Vision-Language)",
|
|
222
|
+
"value": "Qwen/Qwen2.5-VL-3B-Instruct",
|
|
223
|
+
},
|
|
224
|
+
{"name": "Custom model", "value": "custom"},
|
|
225
|
+
],
|
|
226
|
+
default=0,
|
|
227
|
+
)
|
|
214
228
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
229
|
+
if model == "custom":
|
|
230
|
+
console.print("Enter the model name (HuggingFace ID):")
|
|
231
|
+
model = input().strip()
|
|
218
232
|
|
|
219
233
|
# Validate model is a VL model (whether provided via CLI or selected)
|
|
220
234
|
if model:
|
|
@@ -277,6 +291,7 @@ def run_local_training(
|
|
|
277
291
|
config, estimated_memory = generate_config_interactive(
|
|
278
292
|
model_name=model,
|
|
279
293
|
presets=presets,
|
|
294
|
+
yes=yes,
|
|
280
295
|
)
|
|
281
296
|
|
|
282
297
|
# Step 5: Save temporary config and display summary
|
|
@@ -288,8 +303,8 @@ def run_local_training(
|
|
|
288
303
|
# Display configuration summary
|
|
289
304
|
display_config_summary(config, len(tasks), gpu_info, estimated_memory)
|
|
290
305
|
|
|
291
|
-
# Step 6: Ask for confirmation (skip if config was provided)
|
|
292
|
-
if not config_file:
|
|
306
|
+
# Step 6: Ask for confirmation (skip if config was provided or in yes mode)
|
|
307
|
+
if not config_file and not yes:
|
|
293
308
|
console.print("\n[bold yellow]Options:[/bold yellow]")
|
|
294
309
|
console.print(" • Type [green]'start'[/green] to begin training")
|
|
295
310
|
console.print(" • Type [cyan]'edit'[/cyan] to open config in your editor")
|
|
@@ -346,7 +361,12 @@ def run_local_training(
|
|
|
346
361
|
try:
|
|
347
362
|
import typer
|
|
348
363
|
|
|
349
|
-
if
|
|
364
|
+
if yes:
|
|
365
|
+
# Always save in yes mode
|
|
366
|
+
config_path = Path("rl_config.json")
|
|
367
|
+
save_config(config, config_path)
|
|
368
|
+
hud_console.info("Auto-saved configuration (--yes mode)")
|
|
369
|
+
elif typer.confirm("Save this configuration for later?", default=True):
|
|
350
370
|
config_path = Path("rl_config.json")
|
|
351
371
|
save_config(config, config_path)
|
|
352
372
|
except Exception as e:
|
|
@@ -367,6 +387,10 @@ def run_local_training(
|
|
|
367
387
|
console.print(
|
|
368
388
|
"[red]Invalid choice. Type 'start', 'edit', or 'cancel':[/red] ", end=""
|
|
369
389
|
)
|
|
390
|
+
elif yes:
|
|
391
|
+
# In yes mode, auto-start training
|
|
392
|
+
hud_console.info("Auto-starting training (--yes mode)")
|
|
393
|
+
config = load_config(temp_config_path)
|
|
370
394
|
else:
|
|
371
395
|
console.print("\n[dim]Using provided configuration file...[/dim]")
|
|
372
396
|
config = load_config(temp_config_path)
|
hud/cli/rl/remote_runner.py
CHANGED
|
@@ -6,14 +6,15 @@ This module implements the new interactive flow for RL training.
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
-
import os
|
|
10
|
-
import subprocess
|
|
11
9
|
import time
|
|
12
10
|
import uuid
|
|
13
11
|
from pathlib import Path
|
|
14
12
|
|
|
15
13
|
from rich.console import Console
|
|
16
14
|
|
|
15
|
+
from hud.cli.rl.celebrate import show_confetti_async
|
|
16
|
+
from hud.cli.rl.viewer import show_json_interactive
|
|
17
|
+
from hud.cli.rl.wait_utils import wait_for_enter_cancel_or_change
|
|
17
18
|
from hud.utils.hud_console import hud_console
|
|
18
19
|
from hud.utils.tasks import load_tasks
|
|
19
20
|
|
|
@@ -57,7 +58,7 @@ def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int =
|
|
|
57
58
|
hud_console.error("Timeout waiting for vLLM deployment")
|
|
58
59
|
raise ValueError("vLLM deployment timeout")
|
|
59
60
|
info = rl_api.get_model(model_name)
|
|
60
|
-
if info.
|
|
61
|
+
if info.status == "ready":
|
|
61
62
|
hud_console.success(
|
|
62
63
|
f"vLLM server ready at http://rl.hud.so/v1/models/{model_name}/vllm"
|
|
63
64
|
)
|
|
@@ -70,6 +71,7 @@ def run_remote_training(
|
|
|
70
71
|
model: str | None,
|
|
71
72
|
config_file: Path | None,
|
|
72
73
|
output_dir: str,
|
|
74
|
+
yes: bool = False,
|
|
73
75
|
) -> None:
|
|
74
76
|
"""Run RL training remotely via the API server following the new interactive flow."""
|
|
75
77
|
from hud.settings import settings
|
|
@@ -81,39 +83,59 @@ def run_remote_training(
|
|
|
81
83
|
)
|
|
82
84
|
raise ValueError("API key not found")
|
|
83
85
|
|
|
84
|
-
# Step 1: CONFIRMATION - Load tasks
|
|
86
|
+
# Step 1: CONFIRMATION - Load tasks
|
|
85
87
|
if tasks_file:
|
|
86
|
-
tasks = load_tasks(tasks_file)
|
|
88
|
+
tasks: list[Task] = load_tasks(tasks_file) # type: ignore[assignment]
|
|
89
|
+
# Resolve tasks immediately after loading (validate + fill defaults)
|
|
90
|
+
from hud.types import Task
|
|
91
|
+
|
|
92
|
+
resolved_tasks: list[dict] = []
|
|
93
|
+
for t in tasks:
|
|
94
|
+
try:
|
|
95
|
+
resolved = Task(**t.model_dump()).model_dump()
|
|
96
|
+
except Exception:
|
|
97
|
+
resolved = t.model_dump()
|
|
98
|
+
resolved_tasks.append(resolved)
|
|
99
|
+
|
|
100
|
+
# Preview resolved task
|
|
101
|
+
if resolved_tasks and not yes:
|
|
102
|
+
try:
|
|
103
|
+
show_json_interactive(resolved_tasks[0], title="Task Preview")
|
|
104
|
+
except Exception as e:
|
|
105
|
+
hud_console.warning(f"Interactive viewer failed: {e}")
|
|
87
106
|
else:
|
|
88
107
|
raise ValueError("Tasks file not found")
|
|
89
108
|
|
|
90
109
|
# Show example task for confirmation
|
|
91
|
-
hud_console.section_title("Example Task from Dataset")
|
|
92
|
-
|
|
93
|
-
if tasks:
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
110
|
+
# hud_console.section_title("Example Task from Dataset")
|
|
111
|
+
|
|
112
|
+
# if tasks:
|
|
113
|
+
# # Display task with truncated values
|
|
114
|
+
# try:
|
|
115
|
+
# task_data = resolved_tasks[0]
|
|
116
|
+
# except Exception:
|
|
117
|
+
# task_data = tasks[0].model_dump()
|
|
118
|
+
# truncated_data = {}
|
|
119
|
+
# max_value_length = 120 # Maximum characters to show per line
|
|
120
|
+
|
|
121
|
+
# for key, value in task_data.items():
|
|
122
|
+
# value_str = str(value)
|
|
123
|
+
# if len(value_str) > max_value_length:
|
|
124
|
+
# truncated_data[key] = value_str[:max_value_length] + "..."
|
|
125
|
+
# else:
|
|
126
|
+
# truncated_data[key] = value_str
|
|
127
|
+
|
|
128
|
+
# hud_console.key_value_table(truncated_data)
|
|
129
|
+
|
|
130
|
+
# if not hud_console.confirm("Proceed with training on this dataset?", default=True):
|
|
131
|
+
# hud_console.error("Training cancelled")
|
|
132
|
+
# return
|
|
111
133
|
|
|
112
134
|
# Step 2: MODEL SELECTION
|
|
113
135
|
hud_console.section_title("Model Selection")
|
|
114
136
|
|
|
115
137
|
# Fetch existing models
|
|
116
|
-
hud_console.info("Fetching your models from https://
|
|
138
|
+
hud_console.info("Fetching your models from https://hud.so/models")
|
|
117
139
|
|
|
118
140
|
try:
|
|
119
141
|
models = rl_api.list_models()
|
|
@@ -137,7 +159,11 @@ def run_remote_training(
|
|
|
137
159
|
choices.append({"name": "Create new model", "value": "__new__"})
|
|
138
160
|
|
|
139
161
|
if not model:
|
|
140
|
-
if
|
|
162
|
+
if yes:
|
|
163
|
+
# In yes mode, always create a new model to avoid conflicts
|
|
164
|
+
selected = "__new__"
|
|
165
|
+
hud_console.info("Auto-creating new model (--yes mode)")
|
|
166
|
+
elif choices:
|
|
141
167
|
selected = hud_console.select("Select a model:", choices=choices)
|
|
142
168
|
else:
|
|
143
169
|
selected = "__new__"
|
|
@@ -155,14 +181,18 @@ def run_remote_training(
|
|
|
155
181
|
hud_console.info("Creating new model...")
|
|
156
182
|
|
|
157
183
|
# Ask for model type
|
|
158
|
-
|
|
159
|
-
"
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
184
|
+
if yes:
|
|
185
|
+
model_type = "Qwen/Qwen2.5-VL-3B-Instruct" # Default model in yes mode
|
|
186
|
+
hud_console.info(f"Auto-selecting base model: {model_type} (--yes mode)")
|
|
187
|
+
else:
|
|
188
|
+
model_type = hud_console.select(
|
|
189
|
+
"Select base model type:",
|
|
190
|
+
choices=[
|
|
191
|
+
{"name": "Qwen2.5-VL-3B-Instruct", "value": "Qwen/Qwen2.5-VL-3B-Instruct"},
|
|
192
|
+
# {"name": "Qwen2.5-VL-7B-Instruct", "value": "Qwen/Qwen2.5-VL-7B-Instruct"}, # noqa: E501
|
|
193
|
+
],
|
|
194
|
+
default=0,
|
|
195
|
+
)
|
|
166
196
|
from rich.prompt import Prompt
|
|
167
197
|
|
|
168
198
|
# Ask for model name
|
|
@@ -174,9 +204,13 @@ def run_remote_training(
|
|
|
174
204
|
default_name = f"{base_default}-{suffix}"
|
|
175
205
|
suffix += 1
|
|
176
206
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
207
|
+
if yes:
|
|
208
|
+
model_name = default_name
|
|
209
|
+
hud_console.info(f"Auto-using model name: {model_name} (--yes mode)")
|
|
210
|
+
else:
|
|
211
|
+
hud_console.info(f"Enter model name (default: {default_name}):")
|
|
212
|
+
model_name = Prompt.ask("Model name", default=default_name)
|
|
213
|
+
model_name = model_name.replace("/", "-").lower()
|
|
180
214
|
|
|
181
215
|
# Create the model with retry on name conflict
|
|
182
216
|
hud_console.info(f"Creating model: {model_name}")
|
|
@@ -203,7 +237,11 @@ def run_remote_training(
|
|
|
203
237
|
try:
|
|
204
238
|
from rich.prompt import Prompt as _Prompt
|
|
205
239
|
|
|
206
|
-
|
|
240
|
+
if yes:
|
|
241
|
+
chosen = alt_name
|
|
242
|
+
hud_console.info(f"Auto-using suggested name: {chosen} (--yes mode)")
|
|
243
|
+
else:
|
|
244
|
+
chosen = _Prompt.ask("Use different name", default=alt_name)
|
|
207
245
|
chosen = chosen.replace("/", "-").lower()
|
|
208
246
|
rl_api.create_model(chosen, model_type)
|
|
209
247
|
hud_console.success(f"Created model: {chosen}")
|
|
@@ -223,7 +261,11 @@ def run_remote_training(
|
|
|
223
261
|
|
|
224
262
|
# Check if model is in training
|
|
225
263
|
if model_info.status == "training":
|
|
226
|
-
if
|
|
264
|
+
if yes:
|
|
265
|
+
# In yes mode, skip training if model is already training
|
|
266
|
+
hud_console.warning(f"{model_name} is already training, skipping (--yes mode)")
|
|
267
|
+
return
|
|
268
|
+
elif hud_console.confirm(
|
|
227
269
|
f"{model_name} is currently training. Stop current training?", default=False
|
|
228
270
|
):
|
|
229
271
|
hud_console.info(f"Stopping training for {model_name}...")
|
|
@@ -266,25 +308,33 @@ def run_remote_training(
|
|
|
266
308
|
|
|
267
309
|
# console.print(gpu_table)
|
|
268
310
|
|
|
269
|
-
|
|
270
|
-
"
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
311
|
+
if yes:
|
|
312
|
+
gpu_choice = "A100" # Default GPU in yes mode
|
|
313
|
+
hud_console.info(f"Auto-selecting GPU: {gpu_choice} 80GB (--yes mode)")
|
|
314
|
+
else:
|
|
315
|
+
gpu_choice = hud_console.select(
|
|
316
|
+
"Select GPU type:",
|
|
317
|
+
choices=[
|
|
318
|
+
{"name": "A100 80GB", "value": "A100"},
|
|
319
|
+
{"name": "H100 80GB", "value": "H100"},
|
|
320
|
+
],
|
|
321
|
+
default=0,
|
|
322
|
+
)
|
|
277
323
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
324
|
+
if yes:
|
|
325
|
+
num_gpus = 1 # Default to 1 GPU in yes mode
|
|
326
|
+
hud_console.info(f"Auto-selecting {num_gpus} GPU(s) (--yes mode)")
|
|
327
|
+
else:
|
|
328
|
+
num_gpus = hud_console.select(
|
|
329
|
+
"Number of GPUs:",
|
|
330
|
+
choices=[
|
|
331
|
+
{"name": "1 GPU", "value": 1},
|
|
332
|
+
{"name": "2 GPUs", "value": 2},
|
|
333
|
+
{"name": "4 GPUs", "value": 4},
|
|
334
|
+
{"name": "8 GPUs", "value": 8},
|
|
335
|
+
],
|
|
336
|
+
default=1,
|
|
337
|
+
)
|
|
288
338
|
|
|
289
339
|
# Generate config with presets
|
|
290
340
|
hud_console.info("Generating training configuration...")
|
|
@@ -294,6 +344,7 @@ def run_remote_training(
|
|
|
294
344
|
config, _ = generate_config_interactive(
|
|
295
345
|
model_name=model_info.base_model,
|
|
296
346
|
presets=presets,
|
|
347
|
+
yes=yes,
|
|
297
348
|
)
|
|
298
349
|
|
|
299
350
|
# Use a short label for tasks (avoid full absolute paths)
|
|
@@ -308,39 +359,61 @@ def run_remote_training(
|
|
|
308
359
|
|
|
309
360
|
config.job_name = f"RL {model_name} on {tasks_label}"
|
|
310
361
|
|
|
311
|
-
# Save config
|
|
362
|
+
# Save config so user can review/edit externally
|
|
312
363
|
temp_config_path = Path(f".rl_config_temp_{model_name}.json")
|
|
313
364
|
save_config(config, temp_config_path)
|
|
314
365
|
|
|
315
|
-
#
|
|
366
|
+
# Interactive review loop: show preview, allow external edits, press Enter to start
|
|
316
367
|
hud_console.info(
|
|
317
368
|
f"Using training configuration from [underline cyan]{temp_config_path.absolute()}[/underline cyan]" # noqa: E501
|
|
318
369
|
)
|
|
319
|
-
edit_choice = hud_console.select(
|
|
320
|
-
"Would you like to start training?",
|
|
321
|
-
choices=[
|
|
322
|
-
{"name": "🚀 Start training!", "value": "start"},
|
|
323
|
-
{"name": "✏️ Review configuration", "value": "edit"},
|
|
324
|
-
{"name": "❌ Cancel", "value": "cancel"},
|
|
325
|
-
],
|
|
326
|
-
)
|
|
327
|
-
|
|
328
|
-
if edit_choice == "cancel":
|
|
329
|
-
hud_console.error("Training cancelled")
|
|
330
|
-
return
|
|
331
|
-
elif edit_choice == "edit":
|
|
332
|
-
# Open editor
|
|
333
|
-
editor = os.environ.get("EDITOR", "nano")
|
|
334
|
-
hud_console.info(f"Opening {editor} to edit configuration...")
|
|
335
370
|
|
|
371
|
+
if yes:
|
|
372
|
+
# In yes mode, skip the interactive review loop
|
|
373
|
+
hud_console.info("Auto-accepting config (--yes mode)")
|
|
374
|
+
# Still show the config briefly
|
|
336
375
|
try:
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
376
|
+
show_json_interactive(
|
|
377
|
+
config.to_dict() if hasattr(config, "to_dict") else {},
|
|
378
|
+
title="RL Config Preview",
|
|
379
|
+
prompt=False,
|
|
380
|
+
)
|
|
341
381
|
except Exception as e:
|
|
342
|
-
hud_console.
|
|
343
|
-
|
|
382
|
+
hud_console.warning(f"Interactive viewer failed: {e}")
|
|
383
|
+
else:
|
|
384
|
+
while True:
|
|
385
|
+
# Reload latest config from file each cycle
|
|
386
|
+
try:
|
|
387
|
+
config = load_config(temp_config_path)
|
|
388
|
+
except Exception as e:
|
|
389
|
+
hud_console.warning(f"Failed to load config from disk, using in-memory: {e}")
|
|
390
|
+
|
|
391
|
+
# Preview current config (no extra prompt here; main loop handles start/cancel)
|
|
392
|
+
try:
|
|
393
|
+
show_json_interactive(
|
|
394
|
+
config.to_dict() if hasattr(config, "to_dict") else {},
|
|
395
|
+
title="RL Config Preview",
|
|
396
|
+
prompt=False,
|
|
397
|
+
)
|
|
398
|
+
except Exception as e:
|
|
399
|
+
hud_console.warning(f"Interactive viewer failed: {e}")
|
|
400
|
+
|
|
401
|
+
console.print(
|
|
402
|
+
"\n[dim]Edit the config file above if needed, then save.[/dim]\n"
|
|
403
|
+
"[bold]Press Enter to start training[/bold], or press 'q' to cancel."
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
start_training, cancelled, changed = wait_for_enter_cancel_or_change(
|
|
407
|
+
temp_config_path
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
if cancelled:
|
|
411
|
+
hud_console.error("Training cancelled")
|
|
412
|
+
return
|
|
413
|
+
if start_training:
|
|
414
|
+
break # proceed
|
|
415
|
+
if changed:
|
|
416
|
+
hud_console.info("Detected configuration changes. Reloading preview...")
|
|
344
417
|
|
|
345
418
|
config_dict = config.to_dict()
|
|
346
419
|
else:
|
|
@@ -353,17 +426,21 @@ def run_remote_training(
|
|
|
353
426
|
|
|
354
427
|
# Launch training
|
|
355
428
|
try:
|
|
429
|
+
# Little celebration before launching
|
|
430
|
+
try:
|
|
431
|
+
show_confetti_async(console)
|
|
432
|
+
except Exception:
|
|
433
|
+
hud_console.info("Launching training...")
|
|
434
|
+
|
|
356
435
|
rl_api.launch_training(
|
|
357
436
|
model_name=model_name,
|
|
358
437
|
config=config_dict,
|
|
359
|
-
tasks=
|
|
438
|
+
tasks=resolved_tasks,
|
|
360
439
|
gpu_type=gpu_choice,
|
|
361
440
|
gpu_count=int(num_gpus),
|
|
362
441
|
)
|
|
363
442
|
|
|
364
|
-
hud_console.
|
|
365
|
-
|
|
366
|
-
hud_console.info(f"See your model {model_name} training on https://app.hud.so/models")
|
|
443
|
+
hud_console.info(f"Your model {model_name} has started training")
|
|
367
444
|
hud_console.hint("Launch another training run via: hud rl <tasks_file>")
|
|
368
445
|
hud_console.hint("Or evaluate the model via: hud eval <tasks_file>")
|
|
369
446
|
|
hud/cli/rl/viewer.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Inline JSON preview with expandable view for RL flow.
|
|
2
|
+
|
|
3
|
+
Uses minimal terminal interaction for inline display.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from blessed import Terminal
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from rich.json import JSON as RichJSON
|
|
14
|
+
from rich.panel import Panel
|
|
15
|
+
from rich.table import Table
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _mask_secrets(value: Any) -> Any:
|
|
19
|
+
"""Recursively mask common secret-looking values."""
|
|
20
|
+
secret_keys = {"authorization", "api-key", "apikey", "token", "secret", "password"}
|
|
21
|
+
|
|
22
|
+
def _is_secret_key(k: str) -> bool:
|
|
23
|
+
lowered = k.lower()
|
|
24
|
+
if lowered in secret_keys:
|
|
25
|
+
return True
|
|
26
|
+
return any(s in lowered for s in ["api", "key", "token", "secret", "password"])
|
|
27
|
+
|
|
28
|
+
if isinstance(value, dict):
|
|
29
|
+
result: dict[str, Any] = {}
|
|
30
|
+
for k, v in value.items():
|
|
31
|
+
if _is_secret_key(str(k)) and isinstance(v, str) and v:
|
|
32
|
+
prefix = v[:4]
|
|
33
|
+
suffix = v[-4:] if len(v) > 8 else ""
|
|
34
|
+
result[k] = f"{prefix}…{suffix}"
|
|
35
|
+
else:
|
|
36
|
+
result[k] = _mask_secrets(v)
|
|
37
|
+
return result
|
|
38
|
+
if isinstance(value, list):
|
|
39
|
+
return [_mask_secrets(v) for v in value]
|
|
40
|
+
return value
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _truncate_value(value: Any, max_len: int = 60) -> str:
|
|
44
|
+
"""Truncate a value for preview display."""
|
|
45
|
+
if isinstance(value, str):
|
|
46
|
+
if len(value) > max_len:
|
|
47
|
+
return value[:max_len] + "…"
|
|
48
|
+
return value
|
|
49
|
+
elif isinstance(value, (dict, list)):
|
|
50
|
+
s = json.dumps(value, separators=(",", ":"))
|
|
51
|
+
if len(s) > max_len:
|
|
52
|
+
return s[:max_len] + "…"
|
|
53
|
+
return s
|
|
54
|
+
else:
|
|
55
|
+
return str(value)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def show_json_interactive(
|
|
59
|
+
data: Any,
|
|
60
|
+
*,
|
|
61
|
+
title: str | None = None,
|
|
62
|
+
max_string_len: int = 60,
|
|
63
|
+
prompt: bool = True,
|
|
64
|
+
initial_expanded: bool = False,
|
|
65
|
+
) -> None:
|
|
66
|
+
"""Display JSON inline with keyboard-based expand/collapse."""
|
|
67
|
+
console = Console()
|
|
68
|
+
safe_data = _mask_secrets(data)
|
|
69
|
+
|
|
70
|
+
# Create preview table
|
|
71
|
+
table = Table(show_header=False, box=None, padding=(0, 1))
|
|
72
|
+
table.add_column("Key", style="cyan", no_wrap=True)
|
|
73
|
+
table.add_column("Value", style="green")
|
|
74
|
+
|
|
75
|
+
if title:
|
|
76
|
+
console.print(f"\n[bold cyan]{title}[/bold cyan]")
|
|
77
|
+
|
|
78
|
+
# Show preview
|
|
79
|
+
if isinstance(safe_data, dict):
|
|
80
|
+
items = list(safe_data.items())
|
|
81
|
+
for _, (key, value) in enumerate(items[:5]):
|
|
82
|
+
truncated = _truncate_value(value, max_string_len)
|
|
83
|
+
table.add_row(key + ":", truncated)
|
|
84
|
+
|
|
85
|
+
if len(items) > 5:
|
|
86
|
+
table.add_row("", f"[dim]... and {len(items) - 5} more items[/dim]")
|
|
87
|
+
else:
|
|
88
|
+
table.add_row("", _truncate_value(safe_data, max_string_len))
|
|
89
|
+
|
|
90
|
+
# Display with border
|
|
91
|
+
if not initial_expanded:
|
|
92
|
+
console.print(Panel(table, expand=False, border_style="dim"))
|
|
93
|
+
else:
|
|
94
|
+
# Expanded view
|
|
95
|
+
if title:
|
|
96
|
+
console.rule(f"[bold cyan]{title} (expanded)[/bold cyan]")
|
|
97
|
+
try:
|
|
98
|
+
console.print(RichJSON.from_data(safe_data))
|
|
99
|
+
except Exception:
|
|
100
|
+
console.print(json.dumps(safe_data, indent=2))
|
|
101
|
+
|
|
102
|
+
if not prompt:
|
|
103
|
+
console.print()
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
# Prompt for expansion (interactive mode)
|
|
107
|
+
console.print("[dim]Press 'e' to expand, Enter to continue[/dim] ", end="")
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
term = Terminal()
|
|
111
|
+
with term.cbreak():
|
|
112
|
+
key = term.inkey(timeout=30) # 30 second timeout
|
|
113
|
+
if key and key.lower() == "e":
|
|
114
|
+
console.print() # New line
|
|
115
|
+
if title:
|
|
116
|
+
console.rule(f"[bold cyan]{title} (expanded)[/bold cyan]")
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
console.print(RichJSON.from_data(safe_data))
|
|
120
|
+
except Exception:
|
|
121
|
+
console.print(json.dumps(safe_data, indent=2))
|
|
122
|
+
|
|
123
|
+
console.print("\n[dim]Press Enter to continue...[/dim]")
|
|
124
|
+
term.inkey()
|
|
125
|
+
except Exception:
|
|
126
|
+
console.print() # Ensure we're on a new line
|
|
127
|
+
choice = input().strip().lower()
|
|
128
|
+
|
|
129
|
+
if choice == "e":
|
|
130
|
+
if title:
|
|
131
|
+
console.rule(f"[bold cyan]{title} (expanded)[/bold cyan]")
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
console.print(RichJSON.from_data(safe_data))
|
|
135
|
+
except Exception:
|
|
136
|
+
console.print(json.dumps(safe_data, indent=2))
|
|
137
|
+
|
|
138
|
+
console.print("\n[dim]Press Enter to continue...[/dim]")
|
|
139
|
+
input()
|
|
140
|
+
|
|
141
|
+
console.print()
|