hud-python 0.4.28__py3-none-any.whl → 0.4.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (77) hide show
  1. hud/__init__.py +2 -1
  2. hud/agents/base.py +81 -45
  3. hud/agents/claude.py +8 -4
  4. hud/agents/openai_chat_generic.py +66 -40
  5. hud/agents/tests/test_base.py +0 -4
  6. hud/agents/tests/test_openai.py +1 -1
  7. hud/cli/__init__.py +182 -52
  8. hud/cli/dev.py +8 -9
  9. hud/cli/eval.py +317 -119
  10. hud/cli/flows/__init__.py +0 -0
  11. hud/cli/flows/tasks.py +0 -0
  12. hud/cli/get.py +160 -0
  13. hud/cli/rl/__init__.py +567 -71
  14. hud/cli/rl/config.py +94 -0
  15. hud/cli/rl/display.py +133 -0
  16. hud/cli/rl/gpu.py +63 -0
  17. hud/cli/rl/gpu_utils.py +318 -0
  18. hud/cli/rl/presets.py +96 -0
  19. hud/cli/rl/remote_runner.py +347 -0
  20. hud/cli/rl/rl_api.py +150 -0
  21. hud/cli/rl/vllm.py +177 -0
  22. hud/cli/tests/test_analyze_metadata.py +0 -1
  23. hud/cli/utils/tasks.py +26 -0
  24. hud/clients/base.py +21 -23
  25. hud/clients/mcp_use.py +36 -44
  26. hud/clients/tests/test_mcp_use_retry.py +10 -10
  27. hud/datasets/__init__.py +4 -3
  28. hud/datasets/{execution/parallel.py → parallel.py} +1 -1
  29. hud/datasets/{execution/runner.py → runner.py} +1 -1
  30. hud/datasets/utils.py +1 -1
  31. hud/native/comparator.py +6 -6
  32. hud/native/tests/test_comparator.py +8 -8
  33. hud/native/tests/test_native_init.py +13 -11
  34. hud/otel/config.py +1 -1
  35. hud/otel/instrumentation.py +35 -0
  36. hud/rl/README.md +30 -0
  37. hud/rl/__init__.py +1 -0
  38. hud/rl/actor.py +174 -0
  39. hud/rl/buffer.py +371 -0
  40. hud/rl/chat_template.jinja +101 -0
  41. hud/rl/config.py +184 -0
  42. hud/rl/distributed.py +95 -0
  43. hud/rl/learner.py +589 -0
  44. hud/rl/tests/__init__.py +1 -0
  45. hud/rl/tests/test_learner.py +171 -0
  46. hud/rl/train.py +354 -0
  47. hud/rl/types.py +101 -0
  48. hud/rl/utils/start_vllm_server.sh +30 -0
  49. hud/rl/utils.py +524 -0
  50. hud/rl/vllm_adapter.py +125 -0
  51. hud/settings.py +6 -0
  52. hud/telemetry/__init__.py +2 -1
  53. hud/telemetry/job.py +46 -3
  54. hud/telemetry/tests/test_trace.py +3 -3
  55. hud/telemetry/trace.py +85 -13
  56. hud/tools/tests/test_computer.py +3 -3
  57. hud/tools/tests/test_computer_actions.py +1 -1
  58. hud/types.py +123 -2
  59. hud/utils/group_eval.py +223 -0
  60. hud/utils/hud_console.py +113 -13
  61. hud/utils/tasks.py +119 -0
  62. hud/utils/tests/test_version.py +1 -1
  63. hud/version.py +1 -1
  64. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/METADATA +20 -2
  65. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/RECORD +68 -48
  66. hud/cli/hf.py +0 -406
  67. hud/cli/rl/README.md +0 -243
  68. hud/cli/rl/init.py +0 -370
  69. hud/cli/rl/pod.py +0 -501
  70. hud/cli/rl/ssh.py +0 -322
  71. hud/cli/rl/train.py +0 -562
  72. hud/cli/rl/utils.py +0 -165
  73. hud/datasets/execution/__init__.py +0 -13
  74. hud/datasets/task.py +0 -116
  75. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/WHEEL +0 -0
  76. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/entry_points.txt +0 -0
  77. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/licenses/LICENSE +0 -0
hud/cli/rl/pod.py DELETED
@@ -1,501 +0,0 @@
1
- """Pod creation and management utilities for Prime Intellect."""
2
-
3
- from __future__ import annotations
4
-
5
- import random
6
- import re
7
- import string
8
- import subprocess
9
- import time
10
- from pathlib import Path # noqa: TC003
11
-
12
- import typer
13
- from rich.console import Console
14
- from rich.live import Live
15
- from rich.spinner import Spinner
16
-
17
- from hud.settings import settings
18
- from hud.utils.hud_console import HUDConsole
19
-
20
- from .ssh import check_and_configure_ssh_key, connect_and_train
21
-
22
- hud_console = HUDConsole()
23
-
24
-
25
- def parse_gpu_config(gpus: str) -> tuple[int, str]:
26
- """Parse GPU configuration string like '2xA100' into count and type."""
27
- if "x" in gpus:
28
- count_str, gpu_type = gpus.split("x", 1)
29
- try:
30
- count = int(count_str)
31
- except ValueError as e:
32
- hud_console.error(f"Invalid GPU count: {count_str}")
33
- raise typer.Exit(1) from e
34
- else:
35
- # Default to 1 GPU if no count specified
36
- count = 1
37
- gpu_type = gpus
38
-
39
- # Map common GPU names to Prime's expected format
40
- gpu_type_map = {
41
- "A100": "A100_80GB",
42
- "A10": "A10_24GB",
43
- "H100": "H100_80GB",
44
- "V100": "V100_32GB",
45
- "RTX3090": "RTX_3090",
46
- "RTX4090": "RTX_4090",
47
- }
48
-
49
- gpu_type = gpu_type_map.get(gpu_type, gpu_type)
50
-
51
- return count, gpu_type
52
-
53
-
54
- async def create_and_connect_prime_pod(
55
- pod_name: str,
56
- gpu_type: str,
57
- gpu_count: int,
58
- model: str,
59
- dataset: str,
60
- config: Path,
61
- output_dir: Path,
62
- image: str,
63
- team_id: str | None = None,
64
- dataset_size: int | None = None,
65
- is_json_file: bool = False,
66
- ) -> None:
67
- """Create a Prime Intellect pod and connect to it for training."""
68
- hud_console.section_title("🌐 Creating Prime Intellect Pod")
69
-
70
- create_cmd = [
71
- "prime",
72
- "pods",
73
- "create",
74
- "--gpu-type",
75
- gpu_type,
76
- "--gpu-count",
77
- str(gpu_count),
78
- "--name",
79
- pod_name,
80
- ]
81
-
82
- hud_console.info(f"Creating pod: {pod_name}")
83
- hud_console.info(f"GPU configuration: {gpu_count}x {gpu_type}")
84
-
85
- # Check for global team config first
86
- has_global_team = False
87
- if not team_id: # Only check if not explicitly provided
88
- team_check = subprocess.run( # noqa: ASYNC221
89
- ["prime", "config", "view"], # noqa: S607
90
- capture_output=True,
91
- text=True,
92
- )
93
- if team_check.returncode == 0:
94
- # Parse the table output more carefully
95
- for line in team_check.stdout.split("\n"):
96
- # Look for "Team ID" in the table (case insensitive)
97
- if "team id" in line.lower():
98
- # Check if there's a value after the | separator
99
- parts = line.split("|")
100
- if len(parts) >= 2:
101
- # Get the value part and check if it's not empty
102
- value = parts[1].strip()
103
- if value and value != "None":
104
- has_global_team = True
105
- # Don't overwrite team_id parameter - that's for explicit user input
106
- break
107
-
108
- # Display automated selections
109
- hud_console.info("")
110
- hud_console.info("Automated selections:")
111
- hud_console.info(" Provider: Will select from supported providers")
112
- hud_console.info(" Disk: Default size")
113
- hud_console.info(" Image: cuda_12_4_pytorch_2_5")
114
- if team_id:
115
- hud_console.info(f" Team: {team_id}")
116
- elif has_global_team:
117
- hud_console.info(" Team: Using pre-configured team")
118
- else:
119
- hud_console.info(" Team: Personal Account")
120
- hud_console.info("")
121
-
122
- # First, get the provider list by running the command with minimal input
123
- hud_console.info("Checking available providers...")
124
-
125
- # Run command with just a newline to see provider list
126
- provider_check = subprocess.run( # noqa: S603, ASYNC221
127
- create_cmd,
128
- input="\n", # Just send newline to see providers
129
- text=True,
130
- capture_output=True,
131
- )
132
-
133
- # Parse provider list
134
- provider_lines = []
135
- provider_map = {} # Maps provider name to number
136
-
137
- if provider_check.stdout:
138
- lines = provider_check.stdout.strip().split("\n")
139
- for line in lines:
140
- # Look for lines like "1. datacrunch (spot) ($0.65/hr)"
141
- if ". " in line and ("$" in line or "/hr" in line):
142
- # Extract provider number and name
143
- parts = line.strip().split(". ", 1)
144
- if len(parts) == 2:
145
- num = parts[0].strip()
146
- # Extract provider name (before parentheses or dollar sign)
147
- provider_info = parts[1]
148
- provider_name = provider_info.split("(")[0].split("$")[0].strip().lower()
149
- provider_map[provider_name] = num
150
- provider_lines.append(line.strip())
151
-
152
- # Select provider based on our supported list
153
- supported_providers = ["datacrunch", "hyperstack"]
154
- provider_choice = "1" # Default fallback
155
-
156
- for provider in supported_providers:
157
- if provider in provider_map:
158
- provider_choice = provider_map[provider]
159
- hud_console.info(f"Selected provider: {provider} (option {provider_choice})")
160
- break
161
-
162
- # Build inputs step by step for clarity
163
- disk_size = "" # Just press enter for default
164
- image_choice = "7" # cuda_12_4_pytorch_2_5
165
-
166
- # Log what we're doing
167
- hud_console.debug("Pod creation configuration:")
168
- hud_console.debug(f" Team ID provided: {team_id}")
169
- hud_console.debug(f" Global team detected: {has_global_team}")
170
-
171
- if team_id:
172
- # Explicit team ID provided, select Custom Team ID (option 3)
173
- team_choice = "3"
174
- # Fixed: confirmation should be lowercase 'y'
175
- inputs = f"{provider_choice}\n{disk_size}\n{image_choice}\n{team_choice}\n{team_id}\ny\n"
176
- hud_console.debug(f" Using explicit team ID: option {team_choice} with ID {team_id}")
177
- elif has_global_team:
178
- # When team is pre-configured, it shows as option 2 - select it
179
- team_choice = "2"
180
- # Fixed: confirmation should be lowercase 'y' and come after team selection
181
- inputs = f"{provider_choice}\n{disk_size}\n{image_choice}\n{team_choice}\ny\n"
182
- hud_console.debug(f" Using pre-configured team: option {team_choice}")
183
- else:
184
- # Personal account (option 1) - just press enter to accept default [1]
185
- inputs = (
186
- f"{provider_choice}\n{disk_size}\n{image_choice}\n\ny\n" # Empty line for default [1]
187
- )
188
- hud_console.debug(" Using personal account: default option [1]")
189
-
190
- hud_console.debug(
191
- f" Input sequence: provider={provider_choice}, disk={disk_size or 'default'}, image={image_choice}, team={team_choice if 'team_choice' in locals() else 'default'}" # noqa: E501
192
- )
193
-
194
- # Show found providers
195
- if provider_lines:
196
- hud_console.info("")
197
- hud_console.info("Found providers:")
198
- for pl in provider_lines[:5]: # Show first 5
199
- hud_console.info(f" {pl}")
200
-
201
- try:
202
- console = Console()
203
-
204
- with Live(
205
- Spinner("dots", text="[bold]Creating pod...[/bold]", style="gold"),
206
- console=console,
207
- refresh_per_second=10,
208
- ):
209
- result = subprocess.run( # noqa: S603, ASYNC221
210
- create_cmd,
211
- input=inputs,
212
- text=True,
213
- capture_output=True,
214
- )
215
-
216
- if result.returncode != 0:
217
- hud_console.error("Failed to create pod")
218
-
219
- # Parse output for better error reporting
220
- output_lines = result.stdout.strip().split("\n") if result.stdout else []
221
-
222
- # Look for provider prices
223
- for line in output_lines:
224
- if "$" in line and "/hr" in line:
225
- hud_console.info(f"Provider option: {line.strip()}")
226
-
227
- # Check for team selection error
228
- if "invalid selection" in result.stdout.lower():
229
- hud_console.error("Team selection failed")
230
- # Find and display the team selection section
231
- for i, line in enumerate(output_lines):
232
- if "Select Team:" in line:
233
- hud_console.info("Team selection options:")
234
- # Show next few lines
235
- for j in range(i, min(i + 6, len(output_lines))):
236
- hud_console.info(f" {output_lines[j]}")
237
- break
238
-
239
- hud_console.info("")
240
- hud_console.hint(
241
- "The Prime CLI interface may have changed. Try running the command manually:"
242
- )
243
- hud_console.command_example(
244
- f"prime pods create --gpu-type {gpu_type} --gpu-count {gpu_count} --name {pod_name}" # noqa: E501
245
- )
246
-
247
- # Show error details
248
- if result.stderr:
249
- hud_console.error("Error output:")
250
- for line in result.stderr.strip().split("\n"):
251
- hud_console.error(f" {line}")
252
-
253
- # Show last part of stdout for context
254
- if result.stdout:
255
- hud_console.info("Command output:")
256
- # Show last 15 lines for brevity
257
- for line in output_lines[-15:]:
258
- hud_console.info(f" {line}")
259
-
260
- if "max_price" in str(result.stderr) or "max_price" in str(result.stdout):
261
- hud_console.warning("")
262
- hud_console.warning("The selected provider requires a maximum price limit.")
263
- hud_console.info("This is a known limitation with some providers.")
264
- hud_console.info("")
265
- hud_console.hint("Workarounds:")
266
- hud_console.info("1. Run the command manually and select a different provider")
267
- hud_console.info(
268
- "2. Try again later when datacrunch (usually cheapest) is available"
269
- )
270
- hud_console.info("3. Use the Prime web interface: https://app.primeintellect.ai")
271
-
272
- hud_console.info("")
273
- hud_console.info("Debug info:")
274
- hud_console.info(f" Command: {' '.join(create_cmd)}")
275
- hud_console.info(f" Pod name: {pod_name}")
276
- hud_console.info(f" Team ID: {'Provided' if team_id else 'Not provided'}")
277
- hud_console.info(f" Global team detected: {has_global_team}")
278
-
279
- # Show the exact inputs we sent
280
- hud_console.info(" Inputs sent (in order):")
281
- input_parts = inputs.strip().split("\n")
282
- input_labels = [
283
- "Provider selection",
284
- "Disk size",
285
- "Image selection",
286
- "Team selection",
287
- "Team ID (if custom)",
288
- "Confirmation",
289
- ]
290
- for i, (part, label) in enumerate(zip(input_parts, input_labels, strict=False)):
291
- if part:
292
- hud_console.info(f" {i + 1}. {label}: '{part}'")
293
- else:
294
- hud_console.info(f" {i + 1}. {label}: [Enter/default]")
295
-
296
- raise typer.Exit(1)
297
-
298
- # Extract pod ID from output
299
- output_lines = result.stdout.strip().split("\n")
300
- pod_id = None
301
- for line in output_lines:
302
- if "Successfully created pod" in line:
303
- # Extract just the pod ID (alphanumeric characters)
304
- match = re.search(r"pod\s+([a-f0-9]+)", line)
305
- pod_id = match.group(1) if match else line.split()[-1].strip()
306
- break
307
-
308
- if not pod_id:
309
- hud_console.error("Could not extract pod ID from output")
310
- hud_console.info(f"Output: {result.stdout}")
311
- raise typer.Exit(1)
312
-
313
- hud_console.success(f"Created pod: {pod_id}")
314
-
315
- # Poll for pod status
316
- ssh_info = await poll_pod_status(pod_id)
317
-
318
- if ssh_info:
319
- hud_console.success("Pod is ready!")
320
- hud_console.info(f"SSH: {ssh_info}")
321
-
322
- # Check if SSH key is configured globally
323
- ssh_key_configured = await check_and_configure_ssh_key()
324
-
325
- if ssh_key_configured:
326
- # Automatically connect and run training
327
- await connect_and_train(
328
- pod_id=pod_id,
329
- ssh_info=ssh_info,
330
- model=model,
331
- dataset=dataset,
332
- config=config,
333
- output_dir=output_dir,
334
- image=image,
335
- dataset_size=dataset_size,
336
- is_json_file=is_json_file,
337
- )
338
- else:
339
- # Manual fallback
340
- hud_console.section_title("📋 Manual Connection Required")
341
- hud_console.info("SSH key configuration failed. Connect manually:")
342
- hud_console.info("")
343
- hud_console.info("1. Download the SSH key from:")
344
- hud_console.info(" https://app.primeintellect.ai/dashboard/profile")
345
- hud_console.info("")
346
- hud_console.info("2. Set permissions:")
347
- hud_console.command_example("chmod 400 /path/to/prime-key.pem", "")
348
- hud_console.info("")
349
- hud_console.info("3. Connect to your instance:")
350
- hud_console.command_example(f"ssh -i /path/to/prime-key.pem {ssh_info}", "")
351
- hud_console.info("")
352
- hud_console.info("4. Run these commands:")
353
- hud_console.command_example("pip install verifiers hud-vf-gym", "")
354
- hud_console.command_example(f"prime env install {image}", "")
355
-
356
- # Build training command with env vars
357
- if settings.wandb_api_key:
358
- hud_console.command_example(
359
- f"export WANDB_API_KEY={settings.wandb_api_key}", ""
360
- )
361
-
362
- train_cmd = f"""vf-train hud-vf-gym \\
363
- --model {model} \\
364
- --env-args '{{"taskset": "{dataset}", "config_path": "/root/config.yaml"}}' \\
365
- --output-dir {output_dir} \\
366
- --run-name hud-rl-{pod_id[:8]} \\
367
- --wandb-project hud-rl"""
368
-
369
- hud_console.command_example(train_cmd, "")
370
- hud_console.info("")
371
- hud_console.warning(
372
- f"Remember to terminate when done: prime pods terminate {pod_id}"
373
- )
374
- else:
375
- hud_console.error("Pod failed to become active")
376
- raise typer.Exit(1)
377
-
378
- except subprocess.CalledProcessError as e:
379
- hud_console.error(f"Failed to create pod: {e}")
380
- raise typer.Exit(1) from e
381
-
382
-
383
- async def poll_pod_status(pod_id: str) -> str | None:
384
- """Poll pod status until SSH is available."""
385
- console = Console()
386
- max_attempts = 120 # 20 minutes with 10s intervals
387
- attempt = 0
388
-
389
- # Create spinner
390
- spinner = Spinner(
391
- "dots", text="Waiting for pod to become active (should take 5-20 min)...", style="gold"
392
- )
393
-
394
- with Live(spinner, console=console, refresh_per_second=10) as live:
395
- while attempt < max_attempts:
396
- try:
397
- # Update check frequency in spinner text every minute
398
- if attempt % 6 == 0: # Every minute
399
- pass # Will update in spinner text below
400
-
401
- result = subprocess.run( # noqa: S603, ASYNC221
402
- ["prime", "pods", "status", pod_id], # noqa: S607
403
- capture_output=True,
404
- text=True,
405
- )
406
-
407
- if result.returncode == 0:
408
- output = result.stdout
409
- elapsed_minutes = (attempt * 10) // 60
410
-
411
- # Parse status - look for lines with Status and SSH
412
- lines = output.split("\n")
413
- status_value = None
414
- ssh_value = None
415
-
416
- for line in lines:
417
- # Handle both regular pipes | and box-drawing chars │
418
- if "|" in line or "│" in line:
419
- # Split by either type of pipe
420
- separator = "│" if "│" in line else "|"
421
- parts = [p.strip() for p in line.split(separator)]
422
-
423
- if len(parts) >= 3:
424
- key = parts[1].strip()
425
- value = parts[2].strip()
426
-
427
- if key == "Status":
428
- status_value = value
429
- elif key == "SSH":
430
- ssh_value = value
431
-
432
- # Update spinner text with current status
433
- if status_value:
434
- # Include SSH status in spinner text
435
- ssh_status = f" | SSH: {ssh_value}" if ssh_value else ""
436
- spinner.text = f"Pod status: {status_value} ({elapsed_minutes}m elapsed, should take 5-20 min){ssh_status}" # noqa: E501
437
-
438
- # Check if SSH is available (and not N/A)
439
- if ssh_value and ssh_value.strip() and ssh_value.strip() != "N/A":
440
- # Stop the spinner before logging
441
- live.stop()
442
- hud_console.success(f"SSH is available: {ssh_value}")
443
- return ssh_value
444
-
445
- time.sleep(10) # Wait 10 seconds # noqa: ASYNC251
446
- attempt += 1
447
-
448
- except Exception as e:
449
- spinner.text = f"[bold red]Status check failed: {e}[/bold red]"
450
- time.sleep(10) # noqa: ASYNC251
451
- attempt += 1
452
-
453
- # Spinner is done, now we can use hud_console.error
454
- hud_console.error("Timeout: Pod did not become ready within 20 minutes")
455
- return None
456
-
457
-
458
- async def run_prime_training(
459
- model: str,
460
- dataset: str,
461
- config: Path,
462
- gpus: str,
463
- output_dir: Path,
464
- image: str,
465
- auto_create_pod: str | None = None,
466
- team_id: str | None = None,
467
- dataset_size: int | None = None,
468
- is_json_file: bool = False,
469
- ) -> None:
470
- """Run training on Prime Intellect infrastructure."""
471
- # Check API key
472
- if not settings.prime_api_key:
473
- hud_console.error("Prime API key not found")
474
- hud_console.info("Set your Prime API key:")
475
- hud_console.info(" export PRIME_API_KEY='your-api-key'")
476
- hud_console.info(" # or")
477
- hud_console.info(" prime auth")
478
- raise typer.Exit(1)
479
-
480
- # Parse GPU configuration
481
- gpu_count, gpu_type = parse_gpu_config(gpus)
482
-
483
- # Generate short pod name (no dots allowed)
484
- model_suffix = model.split("/")[-1].replace(".", "-").lower()
485
- short_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=4)) # noqa: S311
486
- pod_name = f"hud-rl-{model_suffix}-{short_id}"[:30] # Keep it short
487
-
488
- # Always create pod automatically
489
- await create_and_connect_prime_pod(
490
- pod_name=pod_name,
491
- gpu_type=gpu_type,
492
- gpu_count=gpu_count,
493
- model=model,
494
- dataset=dataset,
495
- config=config,
496
- output_dir=output_dir,
497
- image=image,
498
- team_id=team_id,
499
- dataset_size=dataset_size,
500
- is_json_file=is_json_file,
501
- )