@kernel.chat/kbot 3.97.4 → 3.98.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,354 @@
1
+ // Training data curator — reads teacher traces and ~/.kbot/observer/session.jsonl,
2
+ // scores + filters examples, emits a clean JSONL ready for fine-tuning.
3
+ //
4
+ // Scoring signals:
5
+ // + response length is reasonable (100–8000 tokens)
6
+ // + contained a thinking block (for reasoning distill)
7
+ // + tool calls had no error results
8
+ // + outcome.verified === true (if tagged)
9
+ // − user retried or gave negative feedback
10
+ // − response contained "I don't know" / "I can't help"
11
+ // − near-duplicate of another example (hash-based)
12
+ import { existsSync, mkdirSync, readFileSync, writeFileSync, appendFileSync, readdirSync, statSync } from 'node:fs';
13
+ import { join, resolve } from 'node:path';
14
+ import { homedir } from 'node:os';
15
+ import { createHash } from 'node:crypto';
16
+ const NEG_PATTERNS = [
17
+ /\bi (don'?t|do not) (know|have that|have access)\b/i,
18
+ /\bi (can'?t|cannot) (help|assist|do)\b/i,
19
+ /\bas an ai\b/i,
20
+ /\bi'?m sorry,? but\b/i,
21
+ /\bi apologi[sz]e\b/i,
22
+ ];
23
+ function hashText(text) {
24
+ return createHash('sha256').update(text.slice(0, 2000).toLowerCase().replace(/\s+/g, ' ').trim()).digest('hex').slice(0, 16);
25
+ }
26
+ function scoreExample(ex, mode) {
27
+ let score = 0.5;
28
+ const reasons = [];
29
+ const lastAssistant = [...ex.messages].reverse().find(m => m.role === 'assistant')?.content || '';
30
+ const lastUser = [...ex.messages].reverse().find(m => m.role === 'user')?.content || '';
31
+ const respLen = lastAssistant.length;
32
+ // Length signal
33
+ if (respLen >= 200 && respLen <= 8000) {
34
+ score += 0.15;
35
+ reasons.push('good_length');
36
+ }
37
+ if (respLen < 80) {
38
+ score -= 0.3;
39
+ reasons.push('too_short');
40
+ }
41
+ if (respLen > 16000) {
42
+ score -= 0.15;
43
+ reasons.push('too_long');
44
+ }
45
+ // Refusal / low-effort signal
46
+ for (const pat of NEG_PATTERNS) {
47
+ if (pat.test(lastAssistant)) {
48
+ score -= 0.25;
49
+ reasons.push('refusal_like');
50
+ break;
51
+ }
52
+ }
53
+ // Mode-specific scoring
54
+ if (mode === 'reasoning') {
55
+ if (ex.thinking && ex.thinking.length > 100) {
56
+ score += 0.25;
57
+ reasons.push('has_thinking');
58
+ }
59
+ else {
60
+ score -= 0.2;
61
+ reasons.push('no_thinking');
62
+ }
63
+ }
64
+ if (mode === 'agent-trace') {
65
+ if (ex.tool_calls && ex.tool_calls.length > 0) {
66
+ score += 0.25;
67
+ reasons.push('has_tool_calls');
68
+ }
69
+ else {
70
+ score -= 0.3;
71
+ reasons.push('no_tool_calls');
72
+ }
73
+ }
74
+ if (mode === 'code-only') {
75
+ if (/```[a-zA-Z]+/.test(lastAssistant)) {
76
+ score += 0.15;
77
+ reasons.push('has_code_block');
78
+ }
79
+ if (/\b(function|class|import|const|def|fn )\b/.test(lastAssistant)) {
80
+ score += 0.1;
81
+ reasons.push('code_keywords');
82
+ }
83
+ }
84
+ // Outcome-tagged verified example
85
+ if (ex.outcome?.verified) {
86
+ score += 0.3;
87
+ reasons.push('verified');
88
+ }
89
+ if (ex.outcome?.signal === 'user_retry') {
90
+ score -= 0.4;
91
+ reasons.push('user_retried');
92
+ }
93
+ if (ex.outcome?.signal === 'build_pass' || ex.outcome?.signal === 'test_pass') {
94
+ score += 0.35;
95
+ reasons.push(ex.outcome.signal);
96
+ }
97
+ // Interaction quality
98
+ if (lastUser.length > 20 && lastUser.length < 4000) {
99
+ score += 0.05;
100
+ reasons.push('good_prompt_len');
101
+ }
102
+ return { score: Math.max(0, Math.min(1, score)), reasons };
103
+ }
104
+ /** Parse a single line from teacher/traces.jsonl into a normalized Example. */
105
+ function parseTeacherLine(line) {
106
+ try {
107
+ const t = JSON.parse(line);
108
+ const messages = t.messages || [];
109
+ const response = t.response;
110
+ if (!response?.content || messages.length === 0)
111
+ return null;
112
+ return {
113
+ messages: [...messages, { role: 'assistant', content: response.content }],
114
+ thinking: response.thinking,
115
+ tool_calls: response.tool_calls,
116
+ outcome: t.outcome,
117
+ meta: { source: 'teacher', provider: t.provider, model: t.model, ts: t.ts },
118
+ };
119
+ }
120
+ catch {
121
+ return null;
122
+ }
123
+ }
124
+ /** Parse ~/.kbot/observer/session.jsonl. Shape varies; be permissive. */
125
+ function parseObserverLine(line) {
126
+ try {
127
+ const o = JSON.parse(line);
128
+ // Expected fields: input/output or user/assistant or prompt/response
129
+ const user = (o.input || o.user || o.prompt || o.query);
130
+ const assistant = (o.output || o.assistant || o.response || o.answer);
131
+ if (!user || !assistant || typeof user !== 'string' || typeof assistant !== 'string')
132
+ return null;
133
+ return {
134
+ messages: [
135
+ { role: 'user', content: user },
136
+ { role: 'assistant', content: assistant },
137
+ ],
138
+ meta: { source: 'observer', ts: o.ts },
139
+ };
140
+ }
141
+ catch {
142
+ return null;
143
+ }
144
+ }
145
+ function extractContent(raw) {
146
+ if (typeof raw === 'string')
147
+ return raw;
148
+ if (Array.isArray(raw)) {
149
+ return raw.map(b => (b && typeof b === 'object' && 'text' in b) ? String(b.text || '') : '').join('\n');
150
+ }
151
+ return '';
152
+ }
153
+ function parseClaudeCodeFile(filePath) {
154
+ let lines;
155
+ try {
156
+ lines = readFileSync(filePath, 'utf-8').split('\n').filter(l => l.trim());
157
+ }
158
+ catch {
159
+ return [];
160
+ }
161
+ const out = [];
162
+ let pendingUser = null;
163
+ for (const line of lines) {
164
+ let turn;
165
+ try {
166
+ turn = JSON.parse(line);
167
+ }
168
+ catch {
169
+ continue;
170
+ }
171
+ if (turn.type === 'user' && turn.message) {
172
+ const content = extractContent(turn.message.content);
173
+ if (content.length > 10 && !content.startsWith('<tool_result'))
174
+ pendingUser = content;
175
+ }
176
+ else if (turn.type === 'assistant' && turn.message && pendingUser) {
177
+ const content = extractContent(turn.message.content);
178
+ if (content.length > 40) {
179
+ out.push({
180
+ messages: [
181
+ { role: 'user', content: pendingUser },
182
+ { role: 'assistant', content },
183
+ ],
184
+ meta: { source: 'claude-code', session: turn.sessionId, file: filePath },
185
+ });
186
+ pendingUser = null;
187
+ }
188
+ }
189
+ }
190
+ return out;
191
+ }
192
+ function findClaudeCodeSessions(limitFiles = 40, maxDepth = 4) {
193
+ const base = join(homedir(), '.claude', 'projects');
194
+ if (!existsSync(base))
195
+ return [];
196
+ const files = [];
197
+ function walk(dir, depth) {
198
+ if (depth > maxDepth)
199
+ return;
200
+ let entries;
201
+ try {
202
+ entries = readdirSync(dir);
203
+ }
204
+ catch {
205
+ return;
206
+ }
207
+ for (const entry of entries) {
208
+ if (entry.startsWith('.') || entry === 'node_modules')
209
+ continue;
210
+ const full = join(dir, entry);
211
+ let st;
212
+ try {
213
+ st = statSync(full);
214
+ }
215
+ catch {
216
+ continue;
217
+ }
218
+ if (st.isDirectory()) {
219
+ walk(full, depth + 1);
220
+ continue;
221
+ }
222
+ if (entry.endsWith('.jsonl') && st.size > 2048)
223
+ files.push({ path: full, size: st.size });
224
+ }
225
+ }
226
+ try {
227
+ walk(base, 0);
228
+ }
229
+ catch {
230
+ return [];
231
+ }
232
+ files.sort((a, b) => b.size - a.size);
233
+ return files.slice(0, limitFiles).map(f => f.path);
234
+ }
235
+ function defaultSources(includeClaudeCode = true) {
236
+ const home = homedir();
237
+ const list = [
238
+ join(home, '.kbot', 'teacher', 'traces.jsonl'),
239
+ join(home, '.kbot', 'teacher', 'corrections.jsonl'),
240
+ join(home, '.kbot', 'observer', 'session.jsonl'),
241
+ ].filter(p => existsSync(p));
242
+ if (includeClaudeCode)
243
+ list.push(...findClaudeCodeSessions());
244
+ return list;
245
+ }
246
+ function readLines(file) {
247
+ try {
248
+ return readFileSync(file, 'utf-8').split('\n').filter(l => l.trim().length > 0);
249
+ }
250
+ catch {
251
+ return [];
252
+ }
253
+ }
254
+ function parseSource(file) {
255
+ if (file.includes('/.claude/projects/'))
256
+ return parseClaudeCodeFile(file);
257
+ const lines = readLines(file);
258
+ if (file.includes('teacher') || file.includes('corrections'))
259
+ return lines.map(parseTeacherLine).filter((x) => x !== null);
260
+ if (file.includes('observer'))
261
+ return lines.map(parseObserverLine).filter((x) => x !== null);
262
+ const out = [];
263
+ for (const line of lines) {
264
+ const t = parseTeacherLine(line) || parseObserverLine(line);
265
+ if (t)
266
+ out.push(t);
267
+ }
268
+ return out;
269
+ }
270
+ /** Run the curator end-to-end. Returns a report. */
271
+ export function curate(opts = {}) {
272
+ const mode = opts.mode ?? 'default';
273
+ const sources = opts.sources ?? defaultSources();
274
+ const output = resolve(opts.output ?? join(homedir(), '.kbot', 'teacher', `dataset-${mode}.jsonl`));
275
+ const maxExamples = opts.maxExamples ?? 5000;
276
+ const minScore = opts.minScore ?? 0.5;
277
+ const minResp = opts.minResponseLen ?? 80;
278
+ const maxResp = opts.maxResponseLen ?? 32000;
279
+ const dedupe = opts.dedupe !== false;
280
+ const outDir = output.substring(0, output.lastIndexOf('/'));
281
+ if (outDir && !existsSync(outDir))
282
+ mkdirSync(outDir, { recursive: true });
283
+ const bySource = {};
284
+ const all = [];
285
+ const seen = new Set();
286
+ let total = 0;
287
+ let duplicates = 0;
288
+ for (const src of sources) {
289
+ const examples = parseSource(src);
290
+ bySource[src] = examples.length;
291
+ for (const ex of examples) {
292
+ total++;
293
+ const lastAssistant = [...ex.messages].reverse().find(m => m.role === 'assistant')?.content || '';
294
+ if (lastAssistant.length < minResp || lastAssistant.length > maxResp)
295
+ continue;
296
+ const hash = hashText(ex.messages.map(m => m.content).join('|'));
297
+ if (dedupe && seen.has(hash)) {
298
+ duplicates++;
299
+ continue;
300
+ }
301
+ seen.add(hash);
302
+ const { score, reasons } = scoreExample(ex, mode);
303
+ if (score < minScore)
304
+ continue;
305
+ all.push({ ...ex, score, hash, reasons });
306
+ }
307
+ }
308
+ // Sort by score desc, cap
309
+ all.sort((a, b) => b.score - a.score);
310
+ const kept = all.slice(0, maxExamples);
311
+ // Emit as JSONL with OpenAI-style {messages: [...]} format that train_prepare understands
312
+ if (existsSync(output))
313
+ writeFileSync(output, ''); // truncate
314
+ for (const ex of kept) {
315
+ const record = { messages: ex.messages };
316
+ if (ex.thinking)
317
+ record.thinking = ex.thinking;
318
+ if (ex.tool_calls)
319
+ record.tool_calls = ex.tool_calls;
320
+ record._score = ex.score;
321
+ record._reasons = ex.reasons;
322
+ appendFileSync(output, JSON.stringify(record) + '\n');
323
+ }
324
+ const meanScore = kept.length > 0 ? kept.reduce((s, e) => s + e.score, 0) / kept.length : 0;
325
+ return {
326
+ output,
327
+ total_examined: total,
328
+ kept: kept.length,
329
+ rejected: total - kept.length - duplicates,
330
+ duplicates,
331
+ mean_score: Math.round(meanScore * 1000) / 1000,
332
+ by_source: bySource,
333
+ };
334
+ }
335
+ /** Format as a human-readable report */
336
+ export function formatCurateReport(r) {
337
+ const lines = [
338
+ `Curate Report`,
339
+ `${'─'.repeat(40)}`,
340
+ ` Output: ${r.output}`,
341
+ ` Examined: ${r.total_examined}`,
342
+ ` Kept: ${r.kept}`,
343
+ ` Rejected: ${r.rejected}`,
344
+ ` Duplicates: ${r.duplicates}`,
345
+ ` Mean score: ${r.mean_score.toFixed(3)}`,
346
+ '',
347
+ ` Sources:`,
348
+ ];
349
+ for (const [src, count] of Object.entries(r.by_source)) {
350
+ lines.push(` ${count.toString().padStart(6)} ${src}`);
351
+ }
352
+ return lines.join('\n');
353
+ }
354
+ //# sourceMappingURL=train-curate.js.map
@@ -0,0 +1,22 @@
1
+ export interface TrainCycleOptions {
2
+ studentModel?: string;
3
+ teacherProvider?: 'anthropic' | 'openai';
4
+ teacherModel?: string;
5
+ promptsFile?: string;
6
+ corrections?: string;
7
+ samples?: number;
8
+ passThreshold?: number;
9
+ retrain?: boolean;
10
+ dryRun?: boolean;
11
+ }
12
+ export interface CycleResult {
13
+ sampled: number;
14
+ passed: number;
15
+ corrected: number;
16
+ skipped: number;
17
+ corrections_file: string;
18
+ retrain_summary?: string;
19
+ }
20
+ export declare function runCycle(opts?: TrainCycleOptions): Promise<CycleResult>;
21
+ export declare function formatCycleReport(r: CycleResult): string;
22
+ //# sourceMappingURL=train-cycle.d.ts.map
@@ -0,0 +1,230 @@
1
+ // train-cycle — on-policy distillation loop (DeepSeek-R1 Distill pattern).
2
+ //
3
+ // Loop:
4
+ // 1. Sample N prompts from held-out pool (~/.kbot/teacher/prompts.jsonl)
5
+ // 2. Student (local model) generates response
6
+ // 3. Teacher (Claude) grades; if bad, teacher writes a corrected response
7
+ // 4. Pairs go back into ~/.kbot/teacher/corrections.jsonl
8
+ // 5. Optionally retrain via train-self --mode default (with corrections merged)
9
+ //
10
+ // Designed to run as a weekly cron or on-demand.
11
+ import { existsSync, readFileSync, appendFileSync } from 'node:fs';
12
+ import { join } from 'node:path';
13
+ import { homedir } from 'node:os';
14
+ function readPrompts(file) {
15
+ if (!existsSync(file))
16
+ return [];
17
+ return readFileSync(file, 'utf-8')
18
+ .split('\n')
19
+ .filter(l => l.trim())
20
+ .map(l => { try {
21
+ return JSON.parse(l);
22
+ }
23
+ catch {
24
+ return null;
25
+ } })
26
+ .filter((x) => x !== null && typeof x.prompt === 'string');
27
+ }
28
+ /** Auto-harvest prompts from teacher/traces.jsonl (user messages) if no explicit file. */
29
+ function harvestPrompts(limit = 200) {
30
+ const traceFile = join(homedir(), '.kbot', 'teacher', 'traces.jsonl');
31
+ if (!existsSync(traceFile))
32
+ return [];
33
+ const lines = readFileSync(traceFile, 'utf-8').split('\n').filter(l => l.trim());
34
+ const out = [];
35
+ for (const line of lines.slice(-limit * 2)) {
36
+ try {
37
+ const t = JSON.parse(line);
38
+ const msgs = t.messages;
39
+ const firstUser = msgs?.find(m => m.role === 'user');
40
+ if (firstUser && firstUser.content.length > 20 && firstUser.content.length < 2000) {
41
+ out.push({ prompt: firstUser.content, system: t.system });
42
+ }
43
+ if (out.length >= limit)
44
+ break;
45
+ }
46
+ catch { /* skip */ }
47
+ }
48
+ return out;
49
+ }
50
+ async function callOllama(model, system, prompt) {
51
+ const res = await fetch('http://localhost:11434/api/chat', {
52
+ method: 'POST',
53
+ headers: { 'Content-Type': 'application/json' },
54
+ body: JSON.stringify({
55
+ model,
56
+ stream: false,
57
+ messages: [
58
+ ...(system ? [{ role: 'system', content: system }] : []),
59
+ { role: 'user', content: prompt },
60
+ ],
61
+ options: { num_predict: 2048, temperature: 0.2 },
62
+ }),
63
+ signal: AbortSignal.timeout(300_000),
64
+ });
65
+ if (!res.ok)
66
+ throw new Error(`Ollama HTTP ${res.status}`);
67
+ const data = await res.json();
68
+ return data.message?.content || '';
69
+ }
70
+ async function callAnthropicGrade(apiKey, teacherModel, system, prompt, studentResponse) {
71
+ const gradePrompt = `You are grading a student AI's response. Score 0.0–1.0 based on correctness, completeness, and helpfulness.
72
+
73
+ If the response scores below 0.6, provide a corrected response.
74
+
75
+ Return ONLY valid JSON in this exact shape:
76
+ {"score": <number>, "rationale": "<one sentence>", "correction": "<corrected response or empty string>"}
77
+
78
+ ORIGINAL PROMPT:
79
+ ${prompt.slice(0, 4000)}
80
+
81
+ STUDENT RESPONSE:
82
+ ${studentResponse.slice(0, 6000)}`;
83
+ const res = await fetch('https://api.anthropic.com/v1/messages', {
84
+ method: 'POST',
85
+ headers: {
86
+ 'Content-Type': 'application/json',
87
+ 'x-api-key': apiKey,
88
+ 'anthropic-version': '2023-06-01',
89
+ },
90
+ body: JSON.stringify({
91
+ model: teacherModel,
92
+ max_tokens: 4096,
93
+ system: system || 'You are a strict code/AI grader.',
94
+ messages: [{ role: 'user', content: gradePrompt }],
95
+ }),
96
+ signal: AbortSignal.timeout(120_000),
97
+ });
98
+ if (!res.ok)
99
+ throw new Error(`Anthropic HTTP ${res.status}`);
100
+ const data = await res.json();
101
+ const text = (data.content || []).filter(b => b.type === 'text').map(b => b.text).join('');
102
+ // Extract JSON from response
103
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
104
+ if (!jsonMatch)
105
+ return { score: 0, rationale: 'no JSON in response', correction: undefined };
106
+ try {
107
+ const parsed = JSON.parse(jsonMatch[0]);
108
+ return {
109
+ score: typeof parsed.score === 'number' ? parsed.score : 0,
110
+ rationale: parsed.rationale || '',
111
+ correction: parsed.correction || undefined,
112
+ };
113
+ }
114
+ catch {
115
+ return { score: 0, rationale: 'JSON parse failed', correction: undefined };
116
+ }
117
+ }
118
+ export async function runCycle(opts = {}) {
119
+ const studentModel = opts.studentModel ?? 'kernel-coder:latest';
120
+ const teacherProvider = opts.teacherProvider ?? 'anthropic';
121
+ const teacherModel = opts.teacherModel ?? 'claude-opus-4-6';
122
+ const promptsFile = opts.promptsFile ?? join(homedir(), '.kbot', 'teacher', 'prompts.jsonl');
123
+ const correctionsFile = opts.corrections ?? join(homedir(), '.kbot', 'teacher', 'corrections.jsonl');
124
+ const samples = opts.samples ?? 50;
125
+ const threshold = opts.passThreshold ?? 0.6;
126
+ let prompts = readPrompts(promptsFile);
127
+ if (prompts.length === 0)
128
+ prompts = harvestPrompts(samples * 3);
129
+ if (prompts.length === 0) {
130
+ return {
131
+ sampled: 0, passed: 0, corrected: 0, skipped: 0,
132
+ corrections_file: correctionsFile,
133
+ };
134
+ }
135
+ // Shuffle + take N
136
+ prompts = prompts.sort(() => Math.random() - 0.5).slice(0, samples);
137
+ let passed = 0, corrected = 0, skipped = 0;
138
+ // Pull teacher API key
139
+ let teacherKey = '';
140
+ if (teacherProvider === 'anthropic') {
141
+ teacherKey = process.env.ANTHROPIC_API_KEY || '';
142
+ if (!teacherKey) {
143
+ try {
144
+ const cfg = JSON.parse(readFileSync(join(homedir(), '.kbot', 'config.json'), 'utf-8'));
145
+ teacherKey = cfg.anthropic_api_key || cfg.anthropicApiKey || '';
146
+ }
147
+ catch { /* no config */ }
148
+ }
149
+ if (!teacherKey)
150
+ throw new Error('No Anthropic API key for teacher. Set ANTHROPIC_API_KEY or run `kbot auth`.');
151
+ }
152
+ for (const p of prompts) {
153
+ try {
154
+ const studentResp = await callOllama(studentModel, p.system || '', p.prompt);
155
+ if (!studentResp || studentResp.length < 20) {
156
+ skipped++;
157
+ continue;
158
+ }
159
+ if (opts.dryRun) {
160
+ passed++;
161
+ continue;
162
+ }
163
+ const grade = await callAnthropicGrade(teacherKey, teacherModel, p.system || '', p.prompt, studentResp);
164
+ if (grade.score >= threshold) {
165
+ passed++;
166
+ // Keep good student responses as training examples too
167
+ appendFileSync(correctionsFile, JSON.stringify({
168
+ messages: [
169
+ ...(p.system ? [{ role: 'system', content: p.system }] : []),
170
+ { role: 'user', content: p.prompt },
171
+ { role: 'assistant', content: studentResp },
172
+ ],
173
+ _score: grade.score,
174
+ _source: 'student_passed',
175
+ }) + '\n');
176
+ }
177
+ else if (grade.correction) {
178
+ corrected++;
179
+ appendFileSync(correctionsFile, JSON.stringify({
180
+ messages: [
181
+ ...(p.system ? [{ role: 'system', content: p.system }] : []),
182
+ { role: 'user', content: p.prompt },
183
+ { role: 'assistant', content: grade.correction },
184
+ ],
185
+ _score: 1.0,
186
+ _source: 'teacher_corrected',
187
+ _student_score: grade.score,
188
+ _rationale: grade.rationale,
189
+ }) + '\n');
190
+ }
191
+ else {
192
+ skipped++;
193
+ }
194
+ }
195
+ catch {
196
+ skipped++;
197
+ }
198
+ }
199
+ let retrainSummary;
200
+ if (opts.retrain && !opts.dryRun) {
201
+ const { trainSelf, formatTrainSelfReport } = await import('./train-self.js');
202
+ // Merge corrections into next dataset: curator picks them up from ~/.kbot/teacher/
203
+ const r = await trainSelf({ mode: 'default' });
204
+ retrainSummary = formatTrainSelfReport(r);
205
+ }
206
+ return {
207
+ sampled: prompts.length,
208
+ passed,
209
+ corrected,
210
+ skipped,
211
+ corrections_file: correctionsFile,
212
+ retrain_summary: retrainSummary,
213
+ };
214
+ }
215
+ export function formatCycleReport(r) {
216
+ const lines = [
217
+ 'train-cycle',
218
+ '─'.repeat(40),
219
+ ` Sampled: ${r.sampled}`,
220
+ ` Passed: ${r.passed} (student got it right)`,
221
+ ` Corrected: ${r.corrected} (teacher rewrote)`,
222
+ ` Skipped: ${r.skipped}`,
223
+ ` Corrections: ${r.corrections_file}`,
224
+ ];
225
+ if (r.retrain_summary) {
226
+ lines.push('', 'Retrain:', r.retrain_summary);
227
+ }
228
+ return lines.join('\n');
229
+ }
230
+ //# sourceMappingURL=train-cycle.js.map
@@ -0,0 +1,68 @@
1
+ export type Verifier = {
2
+ kind: 'build-pass';
3
+ cwd: string;
4
+ cmd: string;
5
+ } | {
6
+ kind: 'test-pass';
7
+ cwd: string;
8
+ cmd: string;
9
+ } | {
10
+ kind: 'lint-pass';
11
+ cwd: string;
12
+ cmd: string;
13
+ } | {
14
+ kind: 'regex-match';
15
+ pattern: string;
16
+ flags?: string;
17
+ } | {
18
+ kind: 'json-valid';
19
+ requireKeys?: string[];
20
+ } | {
21
+ kind: 'custom';
22
+ script: string;
23
+ };
24
+ export interface GrpoPrompt {
25
+ id?: string;
26
+ prompt: string;
27
+ system?: string;
28
+ verifier: Verifier;
29
+ tags?: string[];
30
+ }
31
+ export interface GrpoOptions {
32
+ studentModel?: string;
33
+ prompts: GrpoPrompt[];
34
+ groupSize?: number;
35
+ iters?: number;
36
+ learningRate?: number;
37
+ klBeta?: number;
38
+ outputDir?: string;
39
+ runnerCmd?: string;
40
+ dryRun?: boolean;
41
+ }
42
+ export interface RolloutResult {
43
+ prompt_id: string;
44
+ completions: Array<{
45
+ text: string;
46
+ reward: number;
47
+ verifier_ok: boolean;
48
+ }>;
49
+ advantage: number[];
50
+ }
51
+ export interface GrpoResult {
52
+ ok: boolean;
53
+ output_dir: string;
54
+ rollouts: RolloutResult[];
55
+ iterations_run: number;
56
+ mean_reward: number;
57
+ log: string;
58
+ }
59
+ /** Apply a verifier to a completion. Returns { ok, reward ∈ [0,1] }. */
60
+ export declare function verify(v: Verifier, completion: string): Promise<{
61
+ ok: boolean;
62
+ reward: number;
63
+ }>;
64
+ export declare function runGrpoRollouts(opts: GrpoOptions): Promise<GrpoResult>;
65
+ /** Default verifier suite for kbot: regex + JSON validity on common code gen. */
66
+ export declare const DEFAULT_VERIFIER_SUITE: GrpoPrompt[];
67
+ export declare function formatGrpoReport(r: GrpoResult): string;
68
+ //# sourceMappingURL=train-grpo.d.ts.map