understanding-prime-env 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/install.js +76 -38
- package/package.json +1 -1
- package/skills/understand-prime-env/SKILL.md +252 -93
package/bin/install.js
CHANGED
|
@@ -5,13 +5,12 @@
|
|
|
5
5
|
const fs = require('fs');
|
|
6
6
|
const path = require('path');
|
|
7
7
|
const os = require('os');
|
|
8
|
-
const readline = require('readline');
|
|
9
8
|
|
|
10
9
|
const SKILL_NAME = 'understand-prime-env';
|
|
11
10
|
const PACKAGE_ROOT = path.join(__dirname, '..');
|
|
12
11
|
const SKILL_MD_PATH = path.join(PACKAGE_ROOT, 'skills', SKILL_NAME, 'SKILL.md');
|
|
13
12
|
|
|
14
|
-
// ── helpers
|
|
13
|
+
// ── helpers ───────────────────────────────────────────────────────────────────
|
|
15
14
|
|
|
16
15
|
function readSkillRaw() {
|
|
17
16
|
return fs.readFileSync(SKILL_MD_PATH, 'utf8');
|
|
@@ -37,9 +36,64 @@ function appendOrCreate(filePath, section) {
|
|
|
37
36
|
|
|
38
37
|
function ok(msg) { console.log('\x1b[32m✓\x1b[0m ' + msg); }
|
|
39
38
|
function info(msg) { console.log('\x1b[36mℹ\x1b[0m ' + msg); }
|
|
40
|
-
function warn(msg) { console.log('\x1b[33m⚠\x1b[0m ' + msg); }
|
|
41
39
|
function fail(msg) { console.error('\x1b[31m✗\x1b[0m ' + msg); process.exit(1); }
|
|
42
40
|
|
|
41
|
+
// ── arrow-key selector ────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
function select(question, choices) {
|
|
44
|
+
return new Promise((resolve) => {
|
|
45
|
+
let cursor = 0;
|
|
46
|
+
|
|
47
|
+
const RESET = '\x1b[0m';
|
|
48
|
+
const BOLD = '\x1b[1m';
|
|
49
|
+
const ACCENT = '\x1b[35m'; // purple
|
|
50
|
+
const DIM = '\x1b[2m';
|
|
51
|
+
const UP = '\x1b[1A';
|
|
52
|
+
const CLEAR = '\x1b[2K\r';
|
|
53
|
+
|
|
54
|
+
function render(first) {
|
|
55
|
+
if (!first) {
|
|
56
|
+
// move up past all choices + question line
|
|
57
|
+
process.stdout.write(UP.repeat(choices.length + 1));
|
|
58
|
+
}
|
|
59
|
+
process.stdout.write(`${CLEAR}${BOLD}${question}${RESET}\n`);
|
|
60
|
+
choices.forEach((c, i) => {
|
|
61
|
+
const active = i === cursor;
|
|
62
|
+
const pointer = active ? `${ACCENT}❯${RESET}` : ' ';
|
|
63
|
+
const label = active ? `${BOLD}${c}${RESET}` : `${DIM}${c}${RESET}`;
|
|
64
|
+
process.stdout.write(`${CLEAR} ${pointer} ${label}\n`);
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
render(true);
|
|
69
|
+
|
|
70
|
+
process.stdin.setRawMode(true);
|
|
71
|
+
process.stdin.resume();
|
|
72
|
+
process.stdin.setEncoding('utf8');
|
|
73
|
+
|
|
74
|
+
function cleanup() {
|
|
75
|
+
process.stdin.setRawMode(false);
|
|
76
|
+
process.stdin.pause();
|
|
77
|
+
process.stdin.removeListener('data', onKey);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function onKey(key) {
|
|
81
|
+
if (key === '') { cleanup(); process.exit(0); } // Ctrl+C
|
|
82
|
+
if (key === '[A' || key === 'k') cursor = (cursor - 1 + choices.length) % choices.length; // up
|
|
83
|
+
if (key === '[B' || key === 'j') cursor = (cursor + 1) % choices.length; // down
|
|
84
|
+
if (key === '\r' || key === '\n') {
|
|
85
|
+
cleanup();
|
|
86
|
+
process.stdout.write('\n');
|
|
87
|
+
resolve(cursor);
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
render(false);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
process.stdin.on('data', onKey);
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
43
97
|
// ── installers ────────────────────────────────────────────────────────────────
|
|
44
98
|
|
|
45
99
|
function installClaude(isGlobal) {
|
|
@@ -62,10 +116,9 @@ function installCursor() {
|
|
|
62
116
|
const dest = path.join(process.cwd(), '.cursor', 'rules');
|
|
63
117
|
ensureDir(dest);
|
|
64
118
|
const outPath = path.join(dest, `${SKILL_NAME}.mdc`);
|
|
65
|
-
// Cursor MDC format: YAML front-matter + markdown body
|
|
66
119
|
const mdc = [
|
|
67
120
|
'---',
|
|
68
|
-
|
|
121
|
+
'description: understand-prime-env — generate HTML overview for a Prime Intellect verifiers environment',
|
|
69
122
|
'globs:',
|
|
70
123
|
' - "**/*.py"',
|
|
71
124
|
'alwaysApply: false',
|
|
@@ -81,7 +134,7 @@ function installCursor() {
|
|
|
81
134
|
function installWindsurf() {
|
|
82
135
|
const body = readSkillBody();
|
|
83
136
|
const outPath = path.join(process.cwd(), '.windsurfrules');
|
|
84
|
-
const section = `# understand-
|
|
137
|
+
const section = `# understand-prime-env\n\n${body}`;
|
|
85
138
|
appendOrCreate(outPath, section);
|
|
86
139
|
ok(`Windsurf → ${outPath}`);
|
|
87
140
|
}
|
|
@@ -91,7 +144,7 @@ function installCopilot() {
|
|
|
91
144
|
const dir = path.join(process.cwd(), '.github');
|
|
92
145
|
ensureDir(dir);
|
|
93
146
|
const outPath = path.join(dir, 'copilot-instructions.md');
|
|
94
|
-
const section = `# understand-
|
|
147
|
+
const section = `# understand-prime-env\n\n${body}`;
|
|
95
148
|
appendOrCreate(outPath, section);
|
|
96
149
|
ok(`GitHub Copilot → ${outPath}`);
|
|
97
150
|
}
|
|
@@ -111,7 +164,7 @@ function installZed() {
|
|
|
111
164
|
const separator = existing ? '\n\n---\n\n' : '';
|
|
112
165
|
settings.assistant = settings.assistant ?? {};
|
|
113
166
|
settings.assistant.default_context = settings.assistant.default_context ?? {};
|
|
114
|
-
settings.assistant.default_context.custom_instructions = existing + separator + `# understand-
|
|
167
|
+
settings.assistant.default_context.custom_instructions = existing + separator + `# understand-prime-env\n\n${body}`;
|
|
115
168
|
|
|
116
169
|
fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
|
|
117
170
|
ok(`Zed → ${settingsPath}`);
|
|
@@ -128,16 +181,16 @@ function installAll(isGlobal) {
|
|
|
128
181
|
// ── CLI ───────────────────────────────────────────────────────────────────────
|
|
129
182
|
|
|
130
183
|
const TOOLS = {
|
|
131
|
-
claude: { label: 'Claude Code',
|
|
132
|
-
cursor: { label: 'Cursor',
|
|
133
|
-
windsurf: { label: 'Windsurf',
|
|
134
|
-
copilot: { label: 'GitHub Copilot',
|
|
135
|
-
zed: { label: 'Zed',
|
|
136
|
-
all: { label: 'All of the above',fn: (g) => installAll(g) },
|
|
184
|
+
claude: { label: 'Claude Code', fn: (g) => installClaude(g) },
|
|
185
|
+
cursor: { label: 'Cursor', fn: () => installCursor() },
|
|
186
|
+
windsurf: { label: 'Windsurf', fn: () => installWindsurf() },
|
|
187
|
+
copilot: { label: 'GitHub Copilot', fn: () => installCopilot() },
|
|
188
|
+
zed: { label: 'Zed', fn: () => installZed() },
|
|
189
|
+
all: { label: 'All of the above', fn: (g) => installAll(g) },
|
|
137
190
|
};
|
|
138
191
|
|
|
139
192
|
const HELP = `
|
|
140
|
-
understand-
|
|
193
|
+
understand-prime-env installer
|
|
141
194
|
Usage: npx understanding-prime-env [tool] [options]
|
|
142
195
|
|
|
143
196
|
Tools (optional — omit for interactive prompt):
|
|
@@ -167,21 +220,8 @@ function parseArgs() {
|
|
|
167
220
|
return { tool, isGlobal };
|
|
168
221
|
}
|
|
169
222
|
|
|
170
|
-
async function prompt(question, choices) {
|
|
171
|
-
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
172
|
-
console.log('\n' + question);
|
|
173
|
-
choices.forEach((c, i) => console.log(` ${i + 1}) ${c}`));
|
|
174
|
-
return new Promise((resolve) => {
|
|
175
|
-
rl.question('\nEnter number: ', (answer) => {
|
|
176
|
-
rl.close();
|
|
177
|
-
const idx = parseInt(answer, 10) - 1;
|
|
178
|
-
resolve(idx >= 0 && idx < choices.length ? idx : -1);
|
|
179
|
-
});
|
|
180
|
-
});
|
|
181
|
-
}
|
|
182
|
-
|
|
183
223
|
async function main() {
|
|
184
|
-
console.log('\n\x1b[1munderstand-
|
|
224
|
+
console.log('\n\x1b[1munderstand-prime-env\x1b[0m · Prime Intellect verifiers skill\n');
|
|
185
225
|
|
|
186
226
|
const { tool, isGlobal } = parseArgs();
|
|
187
227
|
|
|
@@ -190,20 +230,18 @@ async function main() {
|
|
|
190
230
|
return;
|
|
191
231
|
}
|
|
192
232
|
|
|
193
|
-
|
|
194
|
-
const
|
|
195
|
-
const labels = keys.map(k => TOOLS[k].label);
|
|
196
|
-
const idx = await prompt('Which editor / CLI tool do you want to install for?', labels);
|
|
197
|
-
if (idx === -1) { fail('Invalid selection.'); }
|
|
233
|
+
const keys = Object.keys(TOOLS);
|
|
234
|
+
const labels = keys.map(k => TOOLS[k].label);
|
|
198
235
|
|
|
236
|
+
const idx = await select('Which editor / CLI tool do you want to install for?', labels);
|
|
199
237
|
const chosen = keys[idx];
|
|
200
238
|
let global = isGlobal;
|
|
201
239
|
|
|
202
240
|
if (chosen === 'claude' || chosen === 'all') {
|
|
203
|
-
const scopeIdx = await
|
|
204
|
-
'
|
|
205
|
-
|
|
206
|
-
);
|
|
241
|
+
const scopeIdx = await select('Install scope for Claude Code:', [
|
|
242
|
+
'Global (~/.claude/skills/ — available everywhere)',
|
|
243
|
+
'Local (.claude/skills/ — this project only)',
|
|
244
|
+
]);
|
|
207
245
|
global = scopeIdx === 0;
|
|
208
246
|
}
|
|
209
247
|
|
package/package.json
CHANGED
|
@@ -1,156 +1,312 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: understand-prime-env
|
|
3
|
-
description: Generate a rich, self-contained HTML report that fully explains a Prime Intellect verifiers environment. Use this skill any time the user asks to understand, explain, document, visualize, or explore a verifiers environment — even if they just say "what does this environment do?", "explain this env", "give me an overview", or "generate an HTML for this environment". The skill reads the Python source files in the current directory, extracts the dataset, reward functions, rollout logic, and
|
|
3
|
+
description: Generate a rich, self-contained HTML report that fully explains a Prime Intellect verifiers environment. Use this skill any time the user asks to understand, explain, document, visualize, or explore a verifiers environment — even if they just say "what does this environment do?", "explain this env", "give me an overview", or "generate an HTML for this environment". The skill reads the Python source files in the current directory, extracts the raw dataset, reward functions, and rollout logic, and writes a visually stunning gamified HTML file to the environment folder.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
# Understand Environment
|
|
6
|
+
# Understand Prime Environment
|
|
7
7
|
|
|
8
8
|
## Goal
|
|
9
9
|
|
|
10
|
-
Produce a single self-contained HTML file (`environment_overview.html`) that gives a
|
|
10
|
+
Produce a single self-contained HTML file (`environment_overview.html`) that gives a researcher — someone who already knows RL and verifiers but has never seen *this* environment — a complete deep understanding in one page. The output should make them stop and say "whoa."
|
|
11
11
|
|
|
12
|
-
The
|
|
12
|
+
The page has **4 cards**, rendered in a dark gamified UI. Each card is a deep-dive, not a summary.
|
|
13
13
|
|
|
14
14
|
---
|
|
15
15
|
|
|
16
16
|
## Step 1 — Read the source
|
|
17
17
|
|
|
18
|
-
Read **every `.py` file** in the current directory. Also read `pyproject.toml` and `README.md` if they exist. Do not skip helper files — reward logic is often split across modules (e.g. `*_checks.py`, `*_prompts.py`).
|
|
18
|
+
Read **every `.py` file** in the current directory. Also read `pyproject.toml` and `README.md` if they exist. Do not skip helper files — reward logic is often split across modules (e.g. `*_checks.py`, `*_prompts.py`). Read all of them before writing a single line of HTML.
|
|
19
19
|
|
|
20
|
-
Extract
|
|
20
|
+
Extract exactly four things:
|
|
21
21
|
|
|
22
|
-
###
|
|
23
|
-
-
|
|
24
|
-
-
|
|
25
|
-
-
|
|
22
|
+
### A. Environment identity
|
|
23
|
+
- The environment's name, one-paragraph description of what task it trains a model to do
|
|
24
|
+
- The GitHub repo URL if present in any source file or README (e.g. `https://github.com/PrimeIntellect-ai/verifiers`)
|
|
25
|
+
- 3–5 quick stats: e.g. dataset size, number of reward functions, number of turns, task type
|
|
26
26
|
|
|
27
|
-
###
|
|
28
|
-
-
|
|
29
|
-
-
|
|
27
|
+
### B. Raw dataset — the input data itself
|
|
28
|
+
- Where does the data come from? (HuggingFace dataset name + split, a hardcoded `PROMPTS` list, a generator function, etc.)
|
|
29
|
+
- What fields does a single row have? List every field name and its type/purpose
|
|
30
|
+
- Show **one complete real example row** — every field, real values, not truncated. If real data is not available locally, synthesize one example that is indistinguishable from a real row (match field names, value formats, constraints exactly)
|
|
30
31
|
|
|
31
|
-
###
|
|
32
|
-
|
|
33
|
-
-
|
|
34
|
-
-
|
|
32
|
+
### C. Reward functions — the actual logic
|
|
33
|
+
For each reward function (`@vf.reward`, functions passed to `Rubric`, reward methods on `Taskset`):
|
|
34
|
+
- Its name
|
|
35
|
+
- What it is actually checking — not a summary, the real logic: what string patterns, what conditions, what regex, what comparisons
|
|
36
|
+
- Exactly what makes it return **0** (failure) vs **1** (full score) — and any partial scores in between
|
|
37
|
+
- Any thresholds, edge cases, or gotchas a model writer would need to know
|
|
38
|
+
- If a judge LLM is used: the model name, what the judge prompt asks, and what it returns
|
|
39
|
+
|
|
40
|
+
If multiple rewards combine into a final score, extract the exact formula.
|
|
41
|
+
|
|
42
|
+
### D. Theoretical rollout — what would happen
|
|
43
|
+
Write a step-by-step narrative trace of what would happen if you ran one example end-to-end through this environment:
|
|
44
|
+
1. How the raw dataset row gets transformed into the actual prompt the model sees (system prompt + user message, any templating)
|
|
45
|
+
2. What the model is expected to produce (format, length, structure)
|
|
46
|
+
3. If there are tools or a sandbox: what tools, how they'd be called
|
|
47
|
+
4. How each reward function gets called on the model output — in what order, with what inputs
|
|
48
|
+
5. How the final score is computed from the individual reward outputs
|
|
49
|
+
6. What a **perfect response** looks like vs a **zero-score response**
|
|
50
|
+
|
|
51
|
+
Be specific. Trace through the actual code logic. This is theoretical (not executed) but must be grounded in what the code actually does.
|
|
35
52
|
|
|
36
53
|
---
|
|
37
54
|
|
|
38
55
|
## Step 2 — Generate the HTML
|
|
39
56
|
|
|
40
|
-
Write a single self-contained HTML file to `./environment_overview.html`. No external CDN dependencies — all CSS and
|
|
57
|
+
Write a single **self-contained** HTML file to `./environment_overview.html`. No external CDN dependencies — all CSS, JS, and assets inline.
|
|
41
58
|
|
|
42
|
-
|
|
59
|
+
---
|
|
43
60
|
|
|
44
|
-
|
|
61
|
+
### Visual Direction: Dark Gamified Research Dashboard
|
|
45
62
|
|
|
63
|
+
The aesthetic is a **dark, glowing, gamified dashboard** — like a game HUD for researchers. Think deep space + neon. Each card has its own accent color and glows on hover. The reader should feel like they're exploring a world, not reading a doc.
|
|
64
|
+
|
|
65
|
+
**Color palette:**
|
|
46
66
|
```
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
67
|
+
Page background: #080b14 (near-black, blue-tinted)
|
|
68
|
+
Card background: #0e1420 (dark navy)
|
|
69
|
+
Card border: 1.5px gradient border (unique per card)
|
|
70
|
+
|
|
71
|
+
Card 1 accent — purple: #a855f7 glow: rgba(168,85,247,0.25)
|
|
72
|
+
Card 2 accent — cyan: #22d3ee glow: rgba(34,211,238,0.25)
|
|
73
|
+
Card 3 accent — amber: #f59e0b glow: rgba(245,158,11,0.25)
|
|
74
|
+
Card 4 accent — rose: #f43f5e glow: rgba(244,63,94,0.25)
|
|
75
|
+
|
|
76
|
+
Text primary: #f1f5f9
|
|
77
|
+
Text secondary: #94a3b8
|
|
78
|
+
Text muted: #475569
|
|
79
|
+
Code text: #e2e8f0
|
|
51
80
|
```
|
|
52
81
|
|
|
53
|
-
|
|
82
|
+
**Gradient borders** — each card has a glowing gradient border using this trick:
|
|
83
|
+
```css
|
|
84
|
+
.card {
|
|
85
|
+
position: relative;
|
|
86
|
+
background: #0e1420;
|
|
87
|
+
border-radius: 16px;
|
|
88
|
+
}
|
|
89
|
+
.card::before {
|
|
90
|
+
content: '';
|
|
91
|
+
position: absolute;
|
|
92
|
+
inset: -1.5px;
|
|
93
|
+
border-radius: 17px;
|
|
94
|
+
background: linear-gradient(135deg, var(--card-accent), transparent 60%);
|
|
95
|
+
z-index: -1;
|
|
96
|
+
}
|
|
97
|
+
```
|
|
54
98
|
|
|
55
|
-
|
|
99
|
+
On hover: `box-shadow: 0 0 40px var(--card-glow), 0 0 80px rgba(var(--card-glow), 0.3)` + `transform: translateY(-4px)`. Transition: `0.3s cubic-bezier(0.34, 1.56, 0.64, 1)` (slight spring).
|
|
56
100
|
|
|
57
|
-
|
|
101
|
+
**Typography:**
|
|
102
|
+
```css
|
|
103
|
+
font-family: 'SF Pro Display', -apple-system, 'Helvetica Neue', sans-serif; /* body */
|
|
104
|
+
font-family: ui-monospace, 'Cascadia Code', 'Fira Code', monospace; /* code */
|
|
105
|
+
```
|
|
58
106
|
|
|
59
|
-
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
### Page Layout
|
|
60
110
|
|
|
61
111
|
```
|
|
62
|
-
|
|
63
|
-
│ env name (large,
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
│
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
│
|
|
70
|
-
|
|
71
|
-
└─────────────────────────────────────────────┘
|
|
112
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
113
|
+
│ HERO: env name (large, glowing) + tagline + PI logo badge │
|
|
114
|
+
├──────────────┬──────────────┬──────────────┬────────────────┤
|
|
115
|
+
│ CARD 1 │ CARD 2 │ CARD 3 │ CARD 4 │
|
|
116
|
+
│ 🌐 Env │ 📦 Dataset │ ⚡ Rewards │ 🎯 Rollout │
|
|
117
|
+
│ purple │ cyan │ amber │ rose │
|
|
118
|
+
├──────────────┴──────────────┴──────────────┴────────────────┤
|
|
119
|
+
│ FOOTER: generated timestamp · PI branding │
|
|
120
|
+
└─────────────────────────────────────────────────────────────┘
|
|
72
121
|
```
|
|
73
122
|
|
|
74
|
-
|
|
123
|
+
On screens < 1200px: 2-column grid. On mobile: single column.
|
|
124
|
+
|
|
125
|
+
Cards are tall enough to show all content — the page CAN scroll, but each card is self-contained.
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
### Hero Section
|
|
75
130
|
|
|
76
|
-
|
|
77
|
-
- `
|
|
78
|
-
-
|
|
79
|
-
-
|
|
131
|
+
Full-width header above the cards:
|
|
132
|
+
- Large environment name: `font-size: clamp(2rem, 5vw, 4rem)`, `font-weight: 800`, white with a subtle purple text-shadow glow: `text-shadow: 0 0 40px rgba(168,85,247,0.4)`
|
|
133
|
+
- Below: one sentence tagline in `#94a3b8`
|
|
134
|
+
- Top-right: a `⬡ PRIME INTELLECT` badge — `background: rgba(168,85,247,0.1)`, `border: 1px solid rgba(168,85,247,0.3)`, `color: #a855f7`, `border-radius: 6px`, `padding: 4px 12px`, `font-size: 0.7rem`, `letter-spacing: 0.12em`
|
|
135
|
+
- Background: `radial-gradient(ellipse at 30% 0%, rgba(168,85,247,0.12) 0%, transparent 60%), radial-gradient(ellipse at 80% 100%, rgba(34,211,238,0.06) 0%, transparent 50%)`
|
|
136
|
+
- A thin `border-bottom: 1px solid #1e293b` separates the hero from the cards
|
|
80
137
|
|
|
81
|
-
|
|
138
|
+
On page load: env name fades + slides up 16px (`animation: heroIn 0.6s ease-out`). Tagline follows 150ms later.
|
|
82
139
|
|
|
83
|
-
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
### Card 1 — Environment `(purple)`
|
|
143
|
+
|
|
144
|
+
**Header:** `🌐 Environment` in bold white, `font-size: 0.7rem` `ENVIRONMENT` label in purple caps above it.
|
|
145
|
+
|
|
146
|
+
**Body:**
|
|
147
|
+
- A paragraph of prose describing what task this environment trains a model to do — written in plain English, no jargon beyond what the researcher already knows
|
|
148
|
+
- GitHub link (if found): a pill button — `background: rgba(168,85,247,0.1)`, `border: 1px solid rgba(168,85,247,0.3)`, hover brightens, shows `↗` arrow. If no GitHub link found, omit this element entirely.
|
|
149
|
+
- Stat chips row at the bottom: 3–5 pill badges (e.g. `64 prompts`, `2 rewards`, `single-turn`, `math reasoning`). Each chip: `background: rgba(168,85,247,0.08)`, `border: 1px solid rgba(168,85,247,0.2)`, `color: #c4b5fd`, `border-radius: 99px`, `padding: 3px 10px`, `font-size: 0.72rem`
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
### Card 2 — Dataset `(cyan)`
|
|
84
154
|
|
|
155
|
+
**Header:** `📦 Dataset` label.
|
|
156
|
+
|
|
157
|
+
**Body — three subsections, stacked:**
|
|
158
|
+
|
|
159
|
+
**① Source** — one line, monospace: where the data comes from. E.g.:
|
|
160
|
+
```
|
|
161
|
+
HuggingFace · openai/gsm8k · train split
|
|
85
162
|
```
|
|
86
|
-
|
|
163
|
+
or
|
|
87
164
|
```
|
|
165
|
+
Hardcoded · PROMPTS list · 64 examples
|
|
166
|
+
```
|
|
167
|
+
Style: `background: rgba(34,211,238,0.05)`, `border-left: 3px solid #22d3ee`, `padding: 8px 14px`, `border-radius: 0 6px 6px 0`, monospace, cyan text.
|
|
168
|
+
|
|
169
|
+
**② Field anatomy** — a compact table showing every field in a data row:
|
|
170
|
+
|
|
171
|
+
| Field | Type | Description |
|
|
172
|
+
|-------|------|-------------|
|
|
88
173
|
|
|
89
|
-
|
|
90
|
-
- `background: var(--bg-card)`, `border: 1.5px solid var(--border)`, `border-radius: 8px`, `padding: 10px 16px`
|
|
91
|
-
- **Bold label** (2–4 words) on top
|
|
92
|
-
- One-line description beneath in muted text, `font-size: 0.8rem`
|
|
93
|
-
- On hover: `border-color: var(--accent)`
|
|
174
|
+
Table style: no outer border, alternating row backgrounds `rgba(34,211,238,0.03)` / transparent, header row in cyan `0.6rem` caps. Text `0.82rem`.
|
|
94
175
|
|
|
95
|
-
|
|
176
|
+
**③ Example row** — the most important part. Show one complete real (or synthesized) example row. Render it as a structured display, NOT a raw JSON dump:
|
|
177
|
+
- Each field on its own line: field name in cyan monospace, value in white
|
|
178
|
+
- Long text values (like prompt content) get a soft box: `background: rgba(255,255,255,0.03)`, `border: 1px solid #1e293b`, `border-radius: 6px`, `padding: 10px 14px`, `font-size: 0.82rem`, full content shown (no truncation)
|
|
96
179
|
|
|
97
|
-
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
### Card 3 — Rewards `(amber)`
|
|
98
183
|
|
|
99
|
-
|
|
184
|
+
**Header:** `⚡ Rewards` label.
|
|
100
185
|
|
|
101
|
-
|
|
186
|
+
**Body:** For each reward function, a reward block:
|
|
102
187
|
|
|
103
188
|
```
|
|
104
|
-
|
|
105
|
-
|
|
189
|
+
┌─────────────────────────────────────────────────┐
|
|
190
|
+
│ format_reward [float] │
|
|
191
|
+
│ ───────────────────────────────────────────── │
|
|
192
|
+
│ CHECKS │
|
|
193
|
+
│ Checks that response contains <answer>...</ │
|
|
194
|
+
│ answer> tags and inner content is numeric │
|
|
195
|
+
│ │
|
|
196
|
+
│ SCORES 0 if tags missing or content non-num │
|
|
197
|
+
│ SCORES 1 if tags present and content is int │
|
|
198
|
+
└─────────────────────────────────────────────────┘
|
|
106
199
|
```
|
|
107
200
|
|
|
108
|
-
-
|
|
109
|
-
-
|
|
110
|
-
-
|
|
201
|
+
- Function name: `font-family: monospace`, amber color, `font-size: 0.9rem`, `font-weight: 600`
|
|
202
|
+
- `[float]` / `[int]` / `[bool]` type badge: small, muted, right-aligned
|
|
203
|
+
- `CHECKS` / `SCORES 0` / `SCORES 1` labels: `0.65rem`, letter-spacing `0.1em`, amber at 60% opacity
|
|
204
|
+
- Actual descriptions: white text, `0.83rem`, leading `1.5`
|
|
205
|
+
- Block background: `rgba(245,158,11,0.04)`, `border: 1px solid rgba(245,158,11,0.15)`, `border-radius: 10px`, `padding: 14px 16px`
|
|
206
|
+
- Blocks separated by `12px` gap
|
|
111
207
|
|
|
112
|
-
If there is a composite formula, show it
|
|
208
|
+
If there is a composite formula, show it after all blocks in a prominent callout:
|
|
113
209
|
```
|
|
114
|
-
background:
|
|
115
|
-
border: 1px solid
|
|
116
|
-
border-radius:
|
|
117
|
-
padding:
|
|
210
|
+
background: rgba(245,158,11,0.08)
|
|
211
|
+
border: 1px solid rgba(245,158,11,0.3)
|
|
212
|
+
border-radius: 8px
|
|
213
|
+
padding: 14px 18px
|
|
118
214
|
font-family: monospace
|
|
119
|
-
color:
|
|
215
|
+
color: #fcd34d
|
|
216
|
+
font-size: 0.9rem
|
|
120
217
|
```
|
|
121
218
|
|
|
122
|
-
|
|
219
|
+
---
|
|
123
220
|
|
|
124
|
-
###
|
|
221
|
+
### Card 4 — Rollout `(rose)`
|
|
125
222
|
|
|
126
|
-
|
|
223
|
+
**Header:** `🎯 Rollout` label.
|
|
127
224
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
225
|
+
**Body:** A numbered step-by-step trace. Each step:
|
|
226
|
+
|
|
227
|
+
```
|
|
228
|
+
① Data → Prompt
|
|
229
|
+
──────────────────────────────────────────
|
|
230
|
+
The raw row's `problem` field is inserted
|
|
231
|
+
into a system prompt: "Solve the following
|
|
232
|
+
math problem step by step..." followed by
|
|
233
|
+
the problem text as the user message.
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
- Step number: large, `font-size: 1.4rem`, `font-weight: 800`, rose color, `opacity: 0.4`, positioned to the left
|
|
237
|
+
- Step title: `font-weight: 700`, white, `font-size: 0.9rem`
|
|
238
|
+
- A `1px solid rgba(244,63,94,0.15)` rule below the title
|
|
239
|
+
- Description: `0.83rem`, `#94a3b8`, `line-height: 1.6`
|
|
240
|
+
- Between steps: a rose-tinted connector line on the left side (`border-left: 2px solid rgba(244,63,94,0.15)`, `margin-left: 10px`, `padding-left: 20px`)
|
|
241
|
+
|
|
242
|
+
Steps to always include (adapt to what's in the code):
|
|
243
|
+
1. **Data → Prompt** — how the raw row becomes the model's input
|
|
244
|
+
2. **Model response** — what the model is expected to produce (format, structure)
|
|
245
|
+
3. **Reward evaluation** — how each reward function is called and what it receives
|
|
246
|
+
4. **Score computation** — how the final score is derived
|
|
247
|
+
5. **Perfect vs zero** — what a max-score response looks like vs a zero-score response (concrete examples if possible)
|
|
248
|
+
|
|
249
|
+
---
|
|
250
|
+
|
|
251
|
+
### Entrance Animations
|
|
252
|
+
|
|
253
|
+
All guarded by `@media (prefers-reduced-motion: reduce) { *, *::before, *::after { animation: none !important; } }`.
|
|
254
|
+
|
|
255
|
+
```css
|
|
256
|
+
@keyframes heroIn {
|
|
257
|
+
from { opacity: 0; transform: translateY(16px); }
|
|
258
|
+
to { opacity: 1; transform: translateY(0); }
|
|
259
|
+
}
|
|
260
|
+
@keyframes cardIn {
|
|
261
|
+
from { opacity: 0; transform: translateY(24px) scale(0.98); }
|
|
262
|
+
to { opacity: 1; transform: translateY(0) scale(1); }
|
|
263
|
+
}
|
|
138
264
|
```
|
|
139
265
|
|
|
140
|
-
|
|
266
|
+
Cards animate in with staggered delay: `animation-delay: 0.1s, 0.2s, 0.3s, 0.4s` for cards 1–4.
|
|
267
|
+
|
|
268
|
+
---
|
|
269
|
+
|
|
270
|
+
### JavaScript (inline, vanilla)
|
|
141
271
|
|
|
142
272
|
```js
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
273
|
+
// Card hover glow
|
|
274
|
+
document.querySelectorAll('.card').forEach(card => {
|
|
275
|
+
card.addEventListener('mousemove', (e) => {
|
|
276
|
+
const rect = card.getBoundingClientRect();
|
|
277
|
+
const x = ((e.clientX - rect.left) / rect.width) * 100;
|
|
278
|
+
const y = ((e.clientY - rect.top) / rect.height) * 100;
|
|
279
|
+
card.style.setProperty('--mouse-x', x + '%');
|
|
280
|
+
card.style.setProperty('--mouse-y', y + '%');
|
|
149
281
|
});
|
|
150
282
|
});
|
|
151
283
|
```
|
|
152
284
|
|
|
153
|
-
|
|
285
|
+
Add a radial gradient spotlight that follows the mouse inside each card:
|
|
286
|
+
```css
|
|
287
|
+
.card::after {
|
|
288
|
+
content: '';
|
|
289
|
+
position: absolute;
|
|
290
|
+
inset: 0;
|
|
291
|
+
border-radius: 16px;
|
|
292
|
+
background: radial-gradient(
|
|
293
|
+
circle at var(--mouse-x, 50%) var(--mouse-y, 50%),
|
|
294
|
+
rgba(255,255,255,0.03) 0%,
|
|
295
|
+
transparent 60%
|
|
296
|
+
);
|
|
297
|
+
pointer-events: none;
|
|
298
|
+
}
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
---
|
|
302
|
+
|
|
303
|
+
### Footer
|
|
304
|
+
|
|
305
|
+
```
|
|
306
|
+
Generated by Claude · Prime Intellect Verifiers · <ISO timestamp>
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
Centered, `color: #334155`, `font-size: 0.72rem`. `border-top: 1px solid #1e293b`, `padding: 24px`. No links, no extra content.
|
|
154
310
|
|
|
155
311
|
---
|
|
156
312
|
|
|
@@ -158,12 +314,15 @@ Active tab style: `border-bottom: 2px solid var(--accent)`, accent color text. I
|
|
|
158
314
|
|
|
159
315
|
After writing the file, tell the user:
|
|
160
316
|
- The full path and `open environment_overview.html` command
|
|
161
|
-
- Two sentences: what the environment
|
|
317
|
+
- Two sentences: what the environment trains and how it scores
|
|
162
318
|
|
|
163
319
|
## Anti-patterns
|
|
164
320
|
|
|
165
|
-
- Do not
|
|
166
|
-
- Do not
|
|
167
|
-
- Do not
|
|
168
|
-
- Do not
|
|
169
|
-
-
|
|
321
|
+
- Do not produce a surface-level summary — every section must contain the actual logic from the code
|
|
322
|
+
- Do not hallucinate reward weights, field names, dataset contents, or GitHub URLs not found in the source
|
|
323
|
+
- Do not skip helper modules — they often contain the core reward logic
|
|
324
|
+
- Do not truncate the example row — show every field in full
|
|
325
|
+
- Do not use light theme — this is dark-only
|
|
326
|
+
- Do not add tabs, collapsible sections, score bars, or copy buttons — the 4-card layout is the whole structure
|
|
327
|
+
- Do not use Inter, Roboto, or any Google Font
|
|
328
|
+
- If a GitHub URL is not found in the source, omit the GitHub button entirely — never invent a URL
|