@synsci/cli-darwin-x64 1.1.58 → 1.1.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/grpo-rl-training/README.md +1 -1
- package/bin/skills/hugging-face-evaluation/examples/.env.example +7 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +0 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +0 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +0 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +0 -0
- package/bin/skills/hugging-face-trackio/.claude-plugin/plugin.json +19 -0
- package/bin/skills/modal/SKILL.md +316 -275
- package/bin/skills/modal/references/advanced-patterns.md +598 -0
- package/bin/skills/modal/references/examples-catalog.md +423 -0
- package/bin/skills/prime-intellect-lab/README.md +69 -0
- package/bin/skills/prime-intellect-lab/SKILL.md +598 -0
- package/bin/skills/prime-intellect-lab/templates/basic_rl_training.toml +82 -0
- package/bin/skills/tensorpool/SKILL.md +519 -0
- package/bin/synsc +0 -0
- package/package.json +1 -1
- package/bin/skills/modal/references/advanced-usage.md +0 -503
|
@@ -0,0 +1,598 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: prime-intellect-lab
|
|
3
|
+
description: Expert guidance for hosted RL post-training with Prime Intellect Lab — environments, verifiers, GEPA prompt optimization, and agentic training
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
author: Synthetic Sciences
|
|
6
|
+
license: MIT
|
|
7
|
+
tags: [Post-Training, Reinforcement Learning, Prime Intellect, Lab, Hosted Training, Environments, Verifiers, LoRA, Agentic RL, GEPA]
|
|
8
|
+
dependencies: [prime, verifiers]
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Prime Intellect Lab — Hosted RL Post-Training
|
|
12
|
+
|
|
13
|
+
Expert-level guidance for running reinforcement learning post-training on Prime Intellect's hosted platform. Prime Intellect Lab handles GPU infrastructure, orchestration, and evaluation — you focus on environments, reward signals, and model selection.
|
|
14
|
+
|
|
15
|
+
**Note:** Hosted Training is currently in **Private Beta**. Apply for access at [primeintellect.ai](https://www.primeintellect.ai) if needed.
|
|
16
|
+
|
|
17
|
+
## When to Use This Skill
|
|
18
|
+
|
|
19
|
+
**Use Prime Intellect Lab when you need to:**
|
|
20
|
+
- Run hosted GRPO/RL training with managed GPU infrastructure
|
|
21
|
+
- Train with **environments** (dataset + harness + rubric) for verifiable rewards
|
|
22
|
+
- Do **agentic multi-turn training** (tool-use, code execution, web browsing)
|
|
23
|
+
- Apply LoRA on open-weight models (Qwen3, Llama, INTELLECT-3)
|
|
24
|
+
- Use pre-built environments from the Environments Hub (math, code, reasoning, agentic)
|
|
25
|
+
- Run **GEPA prompt optimization** — automatic system prompt refinement without gradient training
|
|
26
|
+
- Use **bundled agent skills** (brainstorm, create, browse, review, eval, train)
|
|
27
|
+
|
|
28
|
+
**Do NOT use Prime Intellect Lab for:**
|
|
29
|
+
- Supervised fine-tuning (SFT) — use **Tinker** instead
|
|
30
|
+
- Local GPU training — use Axolotl, Unsloth, or TRL directly
|
|
31
|
+
- Custom model architectures not in Prime Intellect's supported list
|
|
32
|
+
- Inference serving or deployment — use vLLM, SGLang, etc.
|
|
33
|
+
|
|
34
|
+
### Decision Matrix
|
|
35
|
+
|
|
36
|
+
| Task | Platform |
|
|
37
|
+
|------|----------|
|
|
38
|
+
| SFT / LoRA fine-tuning | Tinker (default) |
|
|
39
|
+
| Hosted RL with environments | **Prime Intellect Lab** |
|
|
40
|
+
| Agentic multi-turn RL | **Prime Intellect Lab** |
|
|
41
|
+
| GEPA prompt optimization | **Prime Intellect Lab** |
|
|
42
|
+
| Local RL with custom rewards | GRPO skill + TRL |
|
|
43
|
+
| On-demand GPU clusters | TensorPool |
|
|
44
|
+
| Custom compute (serverless) | Modal / Lambda |
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Core Concepts
|
|
49
|
+
|
|
50
|
+
### 1. Environments
|
|
51
|
+
|
|
52
|
+
An **environment** in Prime Intellect Lab combines:
|
|
53
|
+
- **Dataset**: The prompts/problems to train on
|
|
54
|
+
- **Harness**: Execution sandbox (code runner, tool-use framework, etc.)
|
|
55
|
+
- **Rubric**: Reward function that scores model outputs (0.0 to 1.0)
|
|
56
|
+
|
|
57
|
+
Environments are the fundamental unit of training. Each environment defines what the model practices and how it's evaluated. Environments are identified as `owner/name` (e.g., `primeintellect/alphabet-sort`).
|
|
58
|
+
|
|
59
|
+
### 2. Hosted Training Architecture
|
|
60
|
+
|
|
61
|
+
Prime Intellect's `prime rl run` orchestrates three components:
|
|
62
|
+
- **Trainer**: Runs the RL optimization (GRPO) with LoRA adapters
|
|
63
|
+
- **Inference**: Generates rollouts (model completions) at scale
|
|
64
|
+
- **Orchestrator**: Coordinates data flow between trainer and inference
|
|
65
|
+
|
|
66
|
+
You don't manage these directly — `prime rl run` handles everything.
|
|
67
|
+
|
|
68
|
+
### 3. Environments Hub
|
|
69
|
+
|
|
70
|
+
Pre-built environments available on the platform:
|
|
71
|
+
- **Math**: GSM8K, MATH, competition math
|
|
72
|
+
- **Code**: HumanEval, MBPP, SWE-bench subsets
|
|
73
|
+
- **Reasoning**: ARC, logic puzzles, alphabet-sort, reverse-text, wordle
|
|
74
|
+
- **Agentic**: Tool-use, wiki-search, multi-step tasks
|
|
75
|
+
|
|
76
|
+
Browse and install environments with `prime env list` and `prime env install`.
|
|
77
|
+
|
|
78
|
+
### 4. Verifiers Library
|
|
79
|
+
|
|
80
|
+
The `verifiers` Python library provides building blocks for custom environments:
|
|
81
|
+
- Rubric functions (exact match, code execution, LLM-as-judge)
|
|
82
|
+
- Harness wrappers (sandboxed code execution, tool-use)
|
|
83
|
+
- Dataset adapters (HuggingFace datasets, custom formats)
|
|
84
|
+
- Install with `pip install verifiers`
|
|
85
|
+
|
|
86
|
+
### 5. GEPA — Prompt Optimization
|
|
87
|
+
|
|
88
|
+
**GEPA** (Genetic-Pareto prompt optimization) is a gradient-free method for refining environment system prompts:
|
|
89
|
+
- Uses a teacher LLM to reflect on evaluation results
|
|
90
|
+
- Iteratively evolves the system prompt for better scores
|
|
91
|
+
- No training required — pure prompt-level optimization
|
|
92
|
+
- Run via `prime gepa run configs/gepa/base.toml`
|
|
93
|
+
|
|
94
|
+
### 6. Lab Agent Skills
|
|
95
|
+
|
|
96
|
+
When you run `prime lab setup`, bundled workflow skills are installed at `.prime/skills/`:
|
|
97
|
+
|
|
98
|
+
| Skill | Purpose |
|
|
99
|
+
|-------|---------|
|
|
100
|
+
| **brainstorm** | Ideation and planning for training experiments |
|
|
101
|
+
| **create** | Create new custom environments |
|
|
102
|
+
| **browse** | Browse existing environments and resources |
|
|
103
|
+
| **review** | Review environment code and configurations |
|
|
104
|
+
| **eval** | Run evaluations and benchmark models |
|
|
105
|
+
| **train** | Launch and manage training runs |
|
|
106
|
+
| **GEPA** | Automatic prompt optimization workflows |
|
|
107
|
+
|
|
108
|
+
These skills provide agent-friendly workflows that the `synsc` CLI can invoke.
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Setup
|
|
113
|
+
|
|
114
|
+
### Installation
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# Install uv (if not already installed)
|
|
118
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
119
|
+
|
|
120
|
+
# Install the Prime CLI
|
|
121
|
+
uv tool install prime
|
|
122
|
+
|
|
123
|
+
# Authenticate
|
|
124
|
+
prime login
|
|
125
|
+
|
|
126
|
+
# Or manually set API key
|
|
127
|
+
prime config set-api-key
|
|
128
|
+
|
|
129
|
+
# Configure SSH key for pod access (optional)
|
|
130
|
+
prime config set-ssh-key-path
|
|
131
|
+
|
|
132
|
+
# Verify setup
|
|
133
|
+
prime config view
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Workspace Setup
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
# Create and enter a workspace directory
|
|
140
|
+
mkdir ~/dev/my-lab && cd ~/dev/my-lab
|
|
141
|
+
|
|
142
|
+
# Initialize the full Lab workspace
|
|
143
|
+
prime lab setup
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
This creates:
|
|
147
|
+
```
|
|
148
|
+
configs/
|
|
149
|
+
endpoints.toml # OpenAI-compatible API endpoint config
|
|
150
|
+
rl/ # Example RL training configs
|
|
151
|
+
alphabet-sort.toml
|
|
152
|
+
gsm8k.toml
|
|
153
|
+
math-python.toml
|
|
154
|
+
reverse-text.toml
|
|
155
|
+
wiki-search.toml
|
|
156
|
+
wordle.toml
|
|
157
|
+
eval/ # Example eval configs
|
|
158
|
+
minimal.toml
|
|
159
|
+
multi-env.toml
|
|
160
|
+
gepa/ # GEPA prompt optimization configs
|
|
161
|
+
base.toml
|
|
162
|
+
wordle.toml
|
|
163
|
+
environments/
|
|
164
|
+
AGENTS.md # Documentation for AI coding agents
|
|
165
|
+
.prime/
|
|
166
|
+
skills/ # Bundled workflow skills (brainstorm, create, etc.)
|
|
167
|
+
AGENTS.md # Top-level agent documentation
|
|
168
|
+
CLAUDE.md # Claude-specific pointer to AGENTS.md
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### For self-hosted training with prime-rl:
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
prime lab setup --prime-rl
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
This additionally clones the `prime-rl` trainer and sets up dependencies.
|
|
178
|
+
|
|
179
|
+
### Verify Credentials
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
# Check if PRIME_API_KEY is set
|
|
183
|
+
[ -n "$PRIME_API_KEY" ] && echo "set" || echo "not set"
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
If connected via the Synthetic Sciences dashboard, `PRIME_API_KEY` is injected automatically.
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Training Workflow
|
|
191
|
+
|
|
192
|
+
### Step 1: Install an Environment
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
# List available environments
|
|
196
|
+
prime env list
|
|
197
|
+
|
|
198
|
+
# Install an environment into your workspace
|
|
199
|
+
prime env install primeintellect/alphabet-sort
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Step 2: Run Baseline Evaluation
|
|
203
|
+
|
|
204
|
+
Before training, establish a baseline:
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
prime eval run primeintellect/alphabet-sort \
|
|
208
|
+
-m Qwen/Qwen3-4B-Instruct-2507 \
|
|
209
|
+
-n 20 \
|
|
210
|
+
-r 1
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Step 3: Configure Training
|
|
214
|
+
|
|
215
|
+
Example `configs/rl/alphabet-sort.toml`:
|
|
216
|
+
|
|
217
|
+
```toml
|
|
218
|
+
model = "Qwen/Qwen3-30B-A3B-Instruct-2507"
|
|
219
|
+
max_steps = 500
|
|
220
|
+
batch_size = 256
|
|
221
|
+
rollouts_per_example = 8
|
|
222
|
+
|
|
223
|
+
[sampling]
|
|
224
|
+
max_tokens = 512
|
|
225
|
+
|
|
226
|
+
[[env]]
|
|
227
|
+
id = "primeintellect/alphabet-sort"
|
|
228
|
+
args = { min_turns = 3, max_turns = 5, power_per_turn = false }
|
|
229
|
+
|
|
230
|
+
[wandb]
|
|
231
|
+
project = "alphabet-sort"
|
|
232
|
+
name = "qwen3-30b-i-alphabet-sort"
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### Step 4: Launch Training
|
|
236
|
+
|
|
237
|
+
```bash
|
|
238
|
+
# Hosted Training (managed infrastructure)
|
|
239
|
+
prime rl run configs/rl/alphabet-sort.toml
|
|
240
|
+
|
|
241
|
+
# Or self-hosted with prime-rl (on your own GPUs)
|
|
242
|
+
uv run prime-rl configs/prime-rl/wiki-search.toml
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### Step 5: Monitor Progress
|
|
246
|
+
|
|
247
|
+
```bash
|
|
248
|
+
# Check run status
|
|
249
|
+
prime rl status
|
|
250
|
+
|
|
251
|
+
# Stream logs
|
|
252
|
+
prime rl logs --follow
|
|
253
|
+
|
|
254
|
+
# View on W&B dashboard (if enabled)
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### Step 6: Review Results
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
# List completed runs
|
|
261
|
+
prime rl list
|
|
262
|
+
|
|
263
|
+
# Download LoRA adapter
|
|
264
|
+
prime rl download <run-id> --output ./lora-adapter
|
|
265
|
+
|
|
266
|
+
# Run post-training evaluation
|
|
267
|
+
prime eval run primeintellect/alphabet-sort \
|
|
268
|
+
-m Qwen/Qwen3-30B-A3B-Instruct-2507 \
|
|
269
|
+
--adapter ./lora-adapter \
|
|
270
|
+
-n 100
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
---
|
|
274
|
+
|
|
275
|
+
## Configuration Reference
|
|
276
|
+
|
|
277
|
+
Full `.toml` config fields:
|
|
278
|
+
|
|
279
|
+
```toml
|
|
280
|
+
# Top-level fields
|
|
281
|
+
model = "Qwen/Qwen3-4B-Instruct-2507" # Model from supported list (required)
|
|
282
|
+
max_steps = 200 # Total training steps
|
|
283
|
+
batch_size = 16 # Prompts per batch
|
|
284
|
+
rollouts_per_example = 8 # Completions per prompt (GRPO group size)
|
|
285
|
+
|
|
286
|
+
[sampling]
|
|
287
|
+
max_tokens = 2048 # Max output tokens per rollout
|
|
288
|
+
temperature = 0.7 # Sampling temperature for rollouts
|
|
289
|
+
top_p = 0.95 # Nucleus sampling
|
|
290
|
+
|
|
291
|
+
# Environments — use [[env]] (double bracket) for array of environments
|
|
292
|
+
[[env]]
|
|
293
|
+
id = "primeintellect/alphabet-sort" # Environment ID (required)
|
|
294
|
+
args = { min_turns = 3, max_turns = 5 } # Environment-specific arguments
|
|
295
|
+
|
|
296
|
+
# For multi-environment training, add more [[env]] blocks:
|
|
297
|
+
# [[env]]
|
|
298
|
+
# id = "primeintellect/gsm8k"
|
|
299
|
+
# weight = 0.3
|
|
300
|
+
|
|
301
|
+
[wandb]
|
|
302
|
+
project = "my-project" # W&B project name
|
|
303
|
+
name = "run-name" # W&B run name
|
|
304
|
+
enabled = true # Enable W&B logging
|
|
305
|
+
|
|
306
|
+
[eval]
|
|
307
|
+
interval = 50 # Eval every N steps
|
|
308
|
+
n_samples = 100 # Samples per eval
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
**Important:** Use `[[env]]` (double brackets) for environment config — this is TOML array-of-tables syntax.
|
|
312
|
+
|
|
313
|
+
---
|
|
314
|
+
|
|
315
|
+
## Available Models
|
|
316
|
+
|
|
317
|
+
| Model | Type | Recommended Use |
|
|
318
|
+
|-------|------|-----------------|
|
|
319
|
+
| `Qwen/Qwen3-4B-Instruct-2507` | Instruct | Quick iteration, prototyping |
|
|
320
|
+
| `Qwen/Qwen3-4B-Thinking-2507` | Thinking | Reasoning-focused training |
|
|
321
|
+
| `Qwen/Qwen3-30B-Instruct-2507` | Instruct (MoE) | Strong general purpose |
|
|
322
|
+
| `Qwen/Qwen3-30B-Thinking-2507` | Thinking (MoE) | Reasoning at scale |
|
|
323
|
+
| `Qwen/Qwen3-235B-Instruct-2507` | Instruct (MoE) | Frontier-level, agentic tasks |
|
|
324
|
+
| `Qwen/Qwen3-235B-Thinking-2507` | Thinking (MoE) | Frontier reasoning |
|
|
325
|
+
| `PrimeIntellect/INTELLECT-3` | — | Prime Intellect's own model |
|
|
326
|
+
|
|
327
|
+
Check the latest supported models:
|
|
328
|
+
```bash
|
|
329
|
+
prime models list
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
---
|
|
333
|
+
|
|
334
|
+
## GEPA Prompt Optimization
|
|
335
|
+
|
|
336
|
+
GEPA (Genetic-Pareto prompt optimization) refines your environment's system prompt without gradient-based training:
|
|
337
|
+
|
|
338
|
+
```bash
|
|
339
|
+
# Run GEPA optimization
|
|
340
|
+
prime gepa run configs/gepa/base.toml
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
Example GEPA config:
|
|
344
|
+
```toml
|
|
345
|
+
environment = "primeintellect/wordle"
|
|
346
|
+
model = "Qwen/Qwen3-30B-Instruct-2507"
|
|
347
|
+
teacher_model = "Qwen/Qwen3-235B-Instruct-2507"
|
|
348
|
+
generations = 10
|
|
349
|
+
population_size = 8
|
|
350
|
+
n_eval_samples = 50
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
**How it works:**
|
|
354
|
+
1. Evaluates current system prompt against the environment
|
|
355
|
+
2. Teacher LLM reflects on failures and proposes improved prompts
|
|
356
|
+
3. Genetic algorithm evolves a population of prompt variants
|
|
357
|
+
4. Pareto-optimal prompts are selected across multiple objectives
|
|
358
|
+
5. Best prompt is saved after N generations
|
|
359
|
+
|
|
360
|
+
---
|
|
361
|
+
|
|
362
|
+
## Environment Development
|
|
363
|
+
|
|
364
|
+
### Building Custom Environments with `verifiers`
|
|
365
|
+
|
|
366
|
+
```bash
|
|
367
|
+
pip install verifiers
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
### Example: Custom Environment
|
|
371
|
+
|
|
372
|
+
```python
|
|
373
|
+
# environments/my_math_env.py
|
|
374
|
+
from verifiers import Environment, Rubric
|
|
375
|
+
|
|
376
|
+
class MyMathEnv(Environment):
|
|
377
|
+
name = "my-org/math-problems"
|
|
378
|
+
|
|
379
|
+
def get_dataset(self):
|
|
380
|
+
from datasets import load_dataset
|
|
381
|
+
ds = load_dataset("openai/gsm8k", "main", split="train")
|
|
382
|
+
return [{"prompt": ex["question"], "reference": ex["answer"]} for ex in ds]
|
|
383
|
+
|
|
384
|
+
def get_rubric(self):
|
|
385
|
+
def score(output: str, reference: str) -> float:
|
|
386
|
+
try:
|
|
387
|
+
pred = float(output.strip().split("####")[-1].strip())
|
|
388
|
+
gold = float(reference.strip().split("####")[-1].strip())
|
|
389
|
+
return 1.0 if abs(pred - gold) < 1e-6 else 0.0
|
|
390
|
+
except (ValueError, IndexError):
|
|
391
|
+
return 0.0
|
|
392
|
+
return Rubric(score_fn=score)
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
### Register and Use
|
|
396
|
+
|
|
397
|
+
```bash
|
|
398
|
+
# Install custom environment
|
|
399
|
+
prime env install ./environments/my_math_env.py
|
|
400
|
+
|
|
401
|
+
# Use in training config
|
|
402
|
+
# [[env]]
|
|
403
|
+
# id = "my-org/math-problems"
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
---
|
|
407
|
+
|
|
408
|
+
## Multi-Environment Training
|
|
409
|
+
|
|
410
|
+
Train on multiple environments by adding multiple `[[env]]` blocks:
|
|
411
|
+
|
|
412
|
+
```toml
|
|
413
|
+
model = "Qwen/Qwen3-30B-Instruct-2507"
|
|
414
|
+
max_steps = 500
|
|
415
|
+
batch_size = 256
|
|
416
|
+
rollouts_per_example = 8
|
|
417
|
+
|
|
418
|
+
[[env]]
|
|
419
|
+
id = "primeintellect/gsm8k"
|
|
420
|
+
weight = 0.5
|
|
421
|
+
|
|
422
|
+
[[env]]
|
|
423
|
+
id = "primeintellect/alphabet-sort"
|
|
424
|
+
weight = 0.3
|
|
425
|
+
|
|
426
|
+
[[env]]
|
|
427
|
+
id = "primeintellect/reverse-text"
|
|
428
|
+
weight = 0.2
|
|
429
|
+
|
|
430
|
+
[sampling]
|
|
431
|
+
max_tokens = 512
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
---
|
|
435
|
+
|
|
436
|
+
## Compute API (GPU Pods)
|
|
437
|
+
|
|
438
|
+
Prime Intellect also provides direct GPU provisioning via the Compute API:
|
|
439
|
+
|
|
440
|
+
```bash
|
|
441
|
+
# Check GPU availability
|
|
442
|
+
prime compute availability
|
|
443
|
+
|
|
444
|
+
# Provision a GPU pod
|
|
445
|
+
prime compute provision --gpu H100 --count 8
|
|
446
|
+
|
|
447
|
+
# List running pods
|
|
448
|
+
prime compute list
|
|
449
|
+
|
|
450
|
+
# SSH into a pod
|
|
451
|
+
prime compute ssh <pod-id>
|
|
452
|
+
|
|
453
|
+
# Delete a pod
|
|
454
|
+
prime compute delete <pod-id>
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
API endpoints (Bearer token auth via `PRIME_API_KEY`):
|
|
458
|
+
- `GET /api/v1/availability/gpus` — Check availability
|
|
459
|
+
- `POST /api/v1/provision-gpu` — Provision instances
|
|
460
|
+
- `GET /api/v1/managing-pods` — List pods
|
|
461
|
+
- `DELETE /api/v1/managing-pods/{pod_id}` — Delete pod
|
|
462
|
+
- `POST /api/v1/sandbox/create-sandbox-endpoint` — Create sandbox
|
|
463
|
+
|
|
464
|
+
---
|
|
465
|
+
|
|
466
|
+
## Troubleshooting
|
|
467
|
+
|
|
468
|
+
### Common Issues
|
|
469
|
+
|
|
470
|
+
**1. `ModuleNotFoundError: No module named 'prime'`**
|
|
471
|
+
```bash
|
|
472
|
+
# Install via uv (recommended)
|
|
473
|
+
uv tool install prime
|
|
474
|
+
# Or in current environment
|
|
475
|
+
pip install prime
|
|
476
|
+
```
|
|
477
|
+
|
|
478
|
+
**2. Authentication failed**
|
|
479
|
+
```bash
|
|
480
|
+
# Re-authenticate
|
|
481
|
+
prime login
|
|
482
|
+
# Or manually set key
|
|
483
|
+
prime config set-api-key
|
|
484
|
+
```
|
|
485
|
+
|
|
486
|
+
**3. Reward stuck at 0.0**
|
|
487
|
+
- Test rubric independently: `prime eval run <env> -m <model> -n 10`
|
|
488
|
+
- Verify the model can produce valid outputs for the task
|
|
489
|
+
- Try increasing `sampling.temperature`
|
|
490
|
+
- Check environment `args` are correct
|
|
491
|
+
|
|
492
|
+
**4. Reward stuck at 1.0**
|
|
493
|
+
- Task is too easy — use a harder environment or add more constraints
|
|
494
|
+
- Check rubric isn't always returning 1.0
|
|
495
|
+
|
|
496
|
+
**5. `pydantic` version errors**
|
|
497
|
+
```bash
|
|
498
|
+
# Prime uses pydantic v2 — create a clean environment
|
|
499
|
+
python3.12 -m venv ~/prime-env && source ~/prime-env/bin/activate
|
|
500
|
+
pip install prime verifiers
|
|
501
|
+
```
|
|
502
|
+
|
|
503
|
+
**6. Model not available**
|
|
504
|
+
```bash
|
|
505
|
+
# Check supported models
|
|
506
|
+
prime models list
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
**7. Training OOM (Out of Memory)**
|
|
510
|
+
- Reduce `batch_size` or `rollouts_per_example`
|
|
511
|
+
- Reduce `sampling.max_tokens`
|
|
512
|
+
- Use a smaller model for initial experiments
|
|
513
|
+
|
|
514
|
+
**8. Run stuck in "pending" state**
|
|
515
|
+
```bash
|
|
516
|
+
prime rl status --verbose
|
|
517
|
+
prime rl cancel <run-id>
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
**9. Environment args not taking effect**
|
|
521
|
+
- Ensure you use `[[env]]` (double brackets), not `[env]`
|
|
522
|
+
- Args must match the environment's expected parameters
|
|
523
|
+
|
|
524
|
+
---
|
|
525
|
+
|
|
526
|
+
## Agent Usage Instructions
|
|
527
|
+
|
|
528
|
+
When the `synsc` agent loads this skill for a user task:
|
|
529
|
+
|
|
530
|
+
1. **Check credentials**: Verify `PRIME_API_KEY` is set
|
|
531
|
+
2. **Set up workspace**: `prime lab setup` if not already initialized
|
|
532
|
+
3. **Select environment**: Use `prime env list` to find matching environments, install with `prime env install`
|
|
533
|
+
4. **Always run baseline eval**: Before training, establish performance with `prime eval run`
|
|
534
|
+
5. **Start small**: Use `Qwen/Qwen3-4B-Instruct-2507` with `max_steps=50` first
|
|
535
|
+
6. **Estimate cost**: Check `prime rl estimate --config <config.toml>` before launching
|
|
536
|
+
7. **Wait for approval**: Present cost estimate and get explicit user approval
|
|
537
|
+
8. **Monitor training**: Use `prime rl logs --follow` to track progress
|
|
538
|
+
9. **Report usage**: After completion, report via `SynSci.reportUsage()` with `service="primeintellect"`
|
|
539
|
+
|
|
540
|
+
### Example Agent Workflow
|
|
541
|
+
|
|
542
|
+
```
|
|
543
|
+
User: "Train a model to solve math problems using RL"
|
|
544
|
+
|
|
545
|
+
Agent steps:
|
|
546
|
+
1. Load skills: prime-intellect-lab, grpo-rl-training
|
|
547
|
+
2. Check PRIME_API_KEY is set
|
|
548
|
+
3. Set up workspace: mkdir ~/dev/math-rl && cd ~/dev/math-rl && prime lab setup
|
|
549
|
+
4. Install env: prime env install primeintellect/gsm8k
|
|
550
|
+
5. Baseline eval: prime eval run primeintellect/gsm8k -m Qwen/Qwen3-4B-Instruct-2507 -n 20 -r 1
|
|
551
|
+
6. Create config TOML with [[env]] for gsm8k
|
|
552
|
+
7. Estimate cost: prime rl estimate --config configs/rl/gsm8k.toml
|
|
553
|
+
8. Present estimate to user, wait for approval
|
|
554
|
+
9. Launch: prime rl run configs/rl/gsm8k.toml
|
|
555
|
+
10. Monitor: prime rl logs --follow
|
|
556
|
+
11. Download adapter and run final eval
|
|
557
|
+
12. Report usage to SynSci
|
|
558
|
+
```
|
|
559
|
+
|
|
560
|
+
### Using the Brainstorm Skill
|
|
561
|
+
|
|
562
|
+
For exploratory tasks, use the brainstorm agent skill:
|
|
563
|
+
```
|
|
564
|
+
User: "Help me figure out the best approach for RL training on code tasks"
|
|
565
|
+
|
|
566
|
+
Agent steps:
|
|
567
|
+
1. Load prime-intellect-lab skill
|
|
568
|
+
2. Set up workspace with prime lab setup
|
|
569
|
+
3. The brainstorm skill in .prime/skills/ provides structured ideation
|
|
570
|
+
4. Browse available code environments: prime env list
|
|
571
|
+
5. Propose experiment plan with environment selection, model choice, config
|
|
572
|
+
6. Run small-scale experiments to validate approach
|
|
573
|
+
```
|
|
574
|
+
|
|
575
|
+
---
|
|
576
|
+
|
|
577
|
+
## Quick Reference
|
|
578
|
+
|
|
579
|
+
| Command | Description |
|
|
580
|
+
|---------|-------------|
|
|
581
|
+
| `prime login` | Authenticate with Prime Intellect |
|
|
582
|
+
| `prime config view` | Show current configuration |
|
|
583
|
+
| `prime config set-api-key` | Manually set API key |
|
|
584
|
+
| `prime models list` | List supported models |
|
|
585
|
+
| `prime env list` | List available environments |
|
|
586
|
+
| `prime env install <id>` | Install environment to workspace |
|
|
587
|
+
| `prime eval run <env> -m <model>` | Run evaluation |
|
|
588
|
+
| `prime rl run <config.toml>` | Launch hosted RL training |
|
|
589
|
+
| `prime rl status` | Check run status |
|
|
590
|
+
| `prime rl logs --follow` | Stream training logs |
|
|
591
|
+
| `prime rl list` | List completed runs |
|
|
592
|
+
| `prime rl download <id>` | Download LoRA adapter |
|
|
593
|
+
| `prime rl cancel <id>` | Cancel a run |
|
|
594
|
+
| `prime gepa run <config.toml>` | Run GEPA prompt optimization |
|
|
595
|
+
| `prime lab setup` | Initialize Lab workspace |
|
|
596
|
+
| `prime lab setup --prime-rl` | Set up self-hosted prime-rl |
|
|
597
|
+
| `prime compute availability` | Check GPU availability |
|
|
598
|
+
| `prime compute provision` | Provision GPU pods |
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Prime Intellect Lab — RL Training Configuration
|
|
2
|
+
# Copy this file to configs/rl/my-run.toml and customize for your task.
|
|
3
|
+
#
|
|
4
|
+
# Usage:
|
|
5
|
+
# prime rl run configs/rl/my-run.toml
|
|
6
|
+
#
|
|
7
|
+
# IMPORTANT: Use [[env]] (double brackets) for environments — this is TOML
|
|
8
|
+
# array-of-tables syntax. Using [env] (single bracket) will NOT work.
|
|
9
|
+
#
|
|
10
|
+
# Three size presets below. Uncomment ONE set of top-level fields.
|
|
11
|
+
|
|
12
|
+
# =============================================================================
|
|
13
|
+
# Model Selection
|
|
14
|
+
# =============================================================================
|
|
15
|
+
# Run `prime models list` for the full supported list.
|
|
16
|
+
model = "Qwen/Qwen3-4B-Instruct-2507"
|
|
17
|
+
# Other options:
|
|
18
|
+
# "Qwen/Qwen3-4B-Thinking-2507" — Reasoning-focused (thinking mode)
|
|
19
|
+
# "Qwen/Qwen3-30B-Instruct-2507" — Strong general purpose (MoE)
|
|
20
|
+
# "Qwen/Qwen3-30B-Thinking-2507" — Reasoning at scale (MoE)
|
|
21
|
+
# "Qwen/Qwen3-235B-Instruct-2507" — Frontier-level (MoE)
|
|
22
|
+
# "Qwen/Qwen3-235B-Thinking-2507" — Frontier reasoning (MoE)
|
|
23
|
+
# "PrimeIntellect/INTELLECT-3" — Prime Intellect's own model
|
|
24
|
+
|
|
25
|
+
# =============================================================================
|
|
26
|
+
# SMALL RUN — Quick iteration / prototyping (~30 min, low cost)
|
|
27
|
+
# =============================================================================
|
|
28
|
+
max_steps = 50
|
|
29
|
+
batch_size = 8
|
|
30
|
+
rollouts_per_example = 4
|
|
31
|
+
|
|
32
|
+
# =============================================================================
|
|
33
|
+
# MEDIUM RUN — Solid training (~2-4 hours)
|
|
34
|
+
# Uncomment below and comment out SMALL RUN above to use.
|
|
35
|
+
# =============================================================================
|
|
36
|
+
# max_steps = 200
|
|
37
|
+
# batch_size = 16
|
|
38
|
+
# rollouts_per_example = 8
|
|
39
|
+
|
|
40
|
+
# =============================================================================
|
|
41
|
+
# LARGE RUN — Full training (~8-24 hours)
|
|
42
|
+
# Uncomment below and comment out SMALL RUN above to use.
|
|
43
|
+
# =============================================================================
|
|
44
|
+
# max_steps = 1000
|
|
45
|
+
# batch_size = 32
|
|
46
|
+
# rollouts_per_example = 16
|
|
47
|
+
|
|
48
|
+
# =============================================================================
|
|
49
|
+
# Sampling — controls rollout generation
|
|
50
|
+
# =============================================================================
|
|
51
|
+
[sampling]
|
|
52
|
+
temperature = 0.7 # Higher = more exploration (0.6-1.0)
|
|
53
|
+
top_p = 0.95
|
|
54
|
+
max_tokens = 2048 # Max output tokens per rollout
|
|
55
|
+
|
|
56
|
+
# =============================================================================
|
|
57
|
+
# Environment — what the model trains on
|
|
58
|
+
# IMPORTANT: Use [[env]] (double brackets), NOT [env]
|
|
59
|
+
# =============================================================================
|
|
60
|
+
[[env]]
|
|
61
|
+
id = "primeintellect/alphabet-sort" # Environment ID (owner/name)
|
|
62
|
+
args = { min_turns = 3, max_turns = 5 } # Environment-specific arguments
|
|
63
|
+
|
|
64
|
+
# For multi-environment training, add more [[env]] blocks:
|
|
65
|
+
# [[env]]
|
|
66
|
+
# id = "primeintellect/gsm8k"
|
|
67
|
+
# weight = 0.3
|
|
68
|
+
|
|
69
|
+
# =============================================================================
|
|
70
|
+
# Weights & Biases logging (optional but recommended)
|
|
71
|
+
# =============================================================================
|
|
72
|
+
[wandb]
|
|
73
|
+
project = "prime-rl-training"
|
|
74
|
+
name = "qwen3-4b-alphabet-sort"
|
|
75
|
+
# entity = "my-team" # Optional: W&B team/org
|
|
76
|
+
|
|
77
|
+
# =============================================================================
|
|
78
|
+
# Evaluation — periodic eval during training
|
|
79
|
+
# =============================================================================
|
|
80
|
+
[eval]
|
|
81
|
+
interval = 50 # Eval every N steps
|
|
82
|
+
n_samples = 100 # Number of eval examples
|