@kernel.chat/kbot 3.97.4 → 3.99.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/agent.js +22 -1
  2. package/dist/cli.js +163 -0
  3. package/dist/skills-loader.d.ts +37 -5
  4. package/dist/skills-loader.js +342 -50
  5. package/dist/teacher-logger.d.ts +71 -0
  6. package/dist/teacher-logger.js +162 -0
  7. package/dist/tools/idempotency-check.d.ts +2 -0
  8. package/dist/tools/idempotency-check.js +31 -0
  9. package/dist/tools/schedule-persistence.d.ts +2 -0
  10. package/dist/tools/schedule-persistence.js +19 -0
  11. package/dist/train-agent-trace.d.ts +29 -0
  12. package/dist/train-agent-trace.js +141 -0
  13. package/dist/train-curate.d.ts +25 -0
  14. package/dist/train-curate.js +354 -0
  15. package/dist/train-cycle.d.ts +22 -0
  16. package/dist/train-cycle.js +230 -0
  17. package/dist/train-grpo.d.ts +68 -0
  18. package/dist/train-grpo.js +206 -0
  19. package/dist/train-merge.d.ts +26 -0
  20. package/dist/train-merge.js +148 -0
  21. package/dist/train-self.d.ts +38 -0
  22. package/dist/train-self.js +232 -0
  23. package/package.json +2 -1
  24. package/skills/deployment/daemon-deployment/SKILL.md +70 -0
  25. package/skills/deployment/ship-pipeline/SKILL.md +81 -0
  26. package/skills/emergent/forge-reflex/SKILL.md +53 -0
  27. package/skills/emergent/mimic-hybrid/SKILL.md +56 -0
  28. package/skills/memory/dream-to-commit/SKILL.md +52 -0
  29. package/skills/memory/memory-cascade/SKILL.md +59 -0
  30. package/skills/music-production/ableton-session-build/SKILL.md +61 -0
  31. package/skills/orchestration/cross-agent-blackboard/SKILL.md +58 -0
  32. package/skills/orchestration/specialist-routing/SKILL.md +57 -0
  33. package/skills/self-improvement/autopoiesis-loop/SKILL.md +47 -0
  34. package/skills/self-improvement/skill-self-authorship/SKILL.md +70 -0
  35. package/skills/self-improvement/teacher-trace-curation/SKILL.md +54 -0
  36. package/skills/software-development/systematic-debugging/SKILL.md +86 -0
  37. package/skills/software-development/test-driven-development/SKILL.md +74 -0
@@ -0,0 +1,230 @@
1
+ // train-cycle — on-policy distillation loop (DeepSeek-R1 Distill pattern).
2
+ //
3
+ // Loop:
4
+ // 1. Sample N prompts from held-out pool (~/.kbot/teacher/prompts.jsonl)
5
+ // 2. Student (local model) generates response
6
+ // 3. Teacher (Claude) grades; if bad, teacher writes a corrected response
7
+ // 4. Pairs go back into ~/.kbot/teacher/corrections.jsonl
8
+ // 5. Optionally retrain via train-self --mode default (with corrections merged)
9
+ //
10
+ // Designed to run as a weekly cron or on-demand.
11
+ import { existsSync, readFileSync, appendFileSync } from 'node:fs';
12
+ import { join } from 'node:path';
13
+ import { homedir } from 'node:os';
14
+ function readPrompts(file) {
15
+ if (!existsSync(file))
16
+ return [];
17
+ return readFileSync(file, 'utf-8')
18
+ .split('\n')
19
+ .filter(l => l.trim())
20
+ .map(l => { try {
21
+ return JSON.parse(l);
22
+ }
23
+ catch {
24
+ return null;
25
+ } })
26
+ .filter((x) => x !== null && typeof x.prompt === 'string');
27
+ }
28
+ /** Auto-harvest prompts from teacher/traces.jsonl (user messages) if no explicit file. */
29
+ function harvestPrompts(limit = 200) {
30
+ const traceFile = join(homedir(), '.kbot', 'teacher', 'traces.jsonl');
31
+ if (!existsSync(traceFile))
32
+ return [];
33
+ const lines = readFileSync(traceFile, 'utf-8').split('\n').filter(l => l.trim());
34
+ const out = [];
35
+ for (const line of lines.slice(-limit * 2)) {
36
+ try {
37
+ const t = JSON.parse(line);
38
+ const msgs = t.messages;
39
+ const firstUser = msgs?.find(m => m.role === 'user');
40
+ if (firstUser && firstUser.content.length > 20 && firstUser.content.length < 2000) {
41
+ out.push({ prompt: firstUser.content, system: t.system });
42
+ }
43
+ if (out.length >= limit)
44
+ break;
45
+ }
46
+ catch { /* skip */ }
47
+ }
48
+ return out;
49
+ }
50
+ async function callOllama(model, system, prompt) {
51
+ const res = await fetch('http://localhost:11434/api/chat', {
52
+ method: 'POST',
53
+ headers: { 'Content-Type': 'application/json' },
54
+ body: JSON.stringify({
55
+ model,
56
+ stream: false,
57
+ messages: [
58
+ ...(system ? [{ role: 'system', content: system }] : []),
59
+ { role: 'user', content: prompt },
60
+ ],
61
+ options: { num_predict: 2048, temperature: 0.2 },
62
+ }),
63
+ signal: AbortSignal.timeout(300_000),
64
+ });
65
+ if (!res.ok)
66
+ throw new Error(`Ollama HTTP ${res.status}`);
67
+ const data = await res.json();
68
+ return data.message?.content || '';
69
+ }
70
+ async function callAnthropicGrade(apiKey, teacherModel, system, prompt, studentResponse) {
71
+ const gradePrompt = `You are grading a student AI's response. Score 0.0–1.0 based on correctness, completeness, and helpfulness.
72
+
73
+ If the response scores below 0.6, provide a corrected response.
74
+
75
+ Return ONLY valid JSON in this exact shape:
76
+ {"score": <number>, "rationale": "<one sentence>", "correction": "<corrected response or empty string>"}
77
+
78
+ ORIGINAL PROMPT:
79
+ ${prompt.slice(0, 4000)}
80
+
81
+ STUDENT RESPONSE:
82
+ ${studentResponse.slice(0, 6000)}`;
83
+ const res = await fetch('https://api.anthropic.com/v1/messages', {
84
+ method: 'POST',
85
+ headers: {
86
+ 'Content-Type': 'application/json',
87
+ 'x-api-key': apiKey,
88
+ 'anthropic-version': '2023-06-01',
89
+ },
90
+ body: JSON.stringify({
91
+ model: teacherModel,
92
+ max_tokens: 4096,
93
+ system: system || 'You are a strict code/AI grader.',
94
+ messages: [{ role: 'user', content: gradePrompt }],
95
+ }),
96
+ signal: AbortSignal.timeout(120_000),
97
+ });
98
+ if (!res.ok)
99
+ throw new Error(`Anthropic HTTP ${res.status}`);
100
+ const data = await res.json();
101
+ const text = (data.content || []).filter(b => b.type === 'text').map(b => b.text).join('');
102
+ // Extract JSON from response
103
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
104
+ if (!jsonMatch)
105
+ return { score: 0, rationale: 'no JSON in response', correction: undefined };
106
+ try {
107
+ const parsed = JSON.parse(jsonMatch[0]);
108
+ return {
109
+ score: typeof parsed.score === 'number' ? parsed.score : 0,
110
+ rationale: parsed.rationale || '',
111
+ correction: parsed.correction || undefined,
112
+ };
113
+ }
114
+ catch {
115
+ return { score: 0, rationale: 'JSON parse failed', correction: undefined };
116
+ }
117
+ }
118
+ export async function runCycle(opts = {}) {
119
+ const studentModel = opts.studentModel ?? 'kernel-coder:latest';
120
+ const teacherProvider = opts.teacherProvider ?? 'anthropic';
121
+ const teacherModel = opts.teacherModel ?? 'claude-opus-4-6';
122
+ const promptsFile = opts.promptsFile ?? join(homedir(), '.kbot', 'teacher', 'prompts.jsonl');
123
+ const correctionsFile = opts.corrections ?? join(homedir(), '.kbot', 'teacher', 'corrections.jsonl');
124
+ const samples = opts.samples ?? 50;
125
+ const threshold = opts.passThreshold ?? 0.6;
126
+ let prompts = readPrompts(promptsFile);
127
+ if (prompts.length === 0)
128
+ prompts = harvestPrompts(samples * 3);
129
+ if (prompts.length === 0) {
130
+ return {
131
+ sampled: 0, passed: 0, corrected: 0, skipped: 0,
132
+ corrections_file: correctionsFile,
133
+ };
134
+ }
135
+ // Shuffle + take N
136
+ prompts = prompts.sort(() => Math.random() - 0.5).slice(0, samples);
137
+ let passed = 0, corrected = 0, skipped = 0;
138
+ // Pull teacher API key
139
+ let teacherKey = '';
140
+ if (teacherProvider === 'anthropic') {
141
+ teacherKey = process.env.ANTHROPIC_API_KEY || '';
142
+ if (!teacherKey) {
143
+ try {
144
+ const cfg = JSON.parse(readFileSync(join(homedir(), '.kbot', 'config.json'), 'utf-8'));
145
+ teacherKey = cfg.anthropic_api_key || cfg.anthropicApiKey || '';
146
+ }
147
+ catch { /* no config */ }
148
+ }
149
+ if (!teacherKey)
150
+ throw new Error('No Anthropic API key for teacher. Set ANTHROPIC_API_KEY or run `kbot auth`.');
151
+ }
152
+ for (const p of prompts) {
153
+ try {
154
+ const studentResp = await callOllama(studentModel, p.system || '', p.prompt);
155
+ if (!studentResp || studentResp.length < 20) {
156
+ skipped++;
157
+ continue;
158
+ }
159
+ if (opts.dryRun) {
160
+ passed++;
161
+ continue;
162
+ }
163
+ const grade = await callAnthropicGrade(teacherKey, teacherModel, p.system || '', p.prompt, studentResp);
164
+ if (grade.score >= threshold) {
165
+ passed++;
166
+ // Keep good student responses as training examples too
167
+ appendFileSync(correctionsFile, JSON.stringify({
168
+ messages: [
169
+ ...(p.system ? [{ role: 'system', content: p.system }] : []),
170
+ { role: 'user', content: p.prompt },
171
+ { role: 'assistant', content: studentResp },
172
+ ],
173
+ _score: grade.score,
174
+ _source: 'student_passed',
175
+ }) + '\n');
176
+ }
177
+ else if (grade.correction) {
178
+ corrected++;
179
+ appendFileSync(correctionsFile, JSON.stringify({
180
+ messages: [
181
+ ...(p.system ? [{ role: 'system', content: p.system }] : []),
182
+ { role: 'user', content: p.prompt },
183
+ { role: 'assistant', content: grade.correction },
184
+ ],
185
+ _score: 1.0,
186
+ _source: 'teacher_corrected',
187
+ _student_score: grade.score,
188
+ _rationale: grade.rationale,
189
+ }) + '\n');
190
+ }
191
+ else {
192
+ skipped++;
193
+ }
194
+ }
195
+ catch {
196
+ skipped++;
197
+ }
198
+ }
199
+ let retrainSummary;
200
+ if (opts.retrain && !opts.dryRun) {
201
+ const { trainSelf, formatTrainSelfReport } = await import('./train-self.js');
202
+ // Merge corrections into next dataset: curator picks them up from ~/.kbot/teacher/
203
+ const r = await trainSelf({ mode: 'default' });
204
+ retrainSummary = formatTrainSelfReport(r);
205
+ }
206
+ return {
207
+ sampled: prompts.length,
208
+ passed,
209
+ corrected,
210
+ skipped,
211
+ corrections_file: correctionsFile,
212
+ retrain_summary: retrainSummary,
213
+ };
214
+ }
215
+ export function formatCycleReport(r) {
216
+ const lines = [
217
+ 'train-cycle',
218
+ '─'.repeat(40),
219
+ ` Sampled: ${r.sampled}`,
220
+ ` Passed: ${r.passed} (student got it right)`,
221
+ ` Corrected: ${r.corrected} (teacher rewrote)`,
222
+ ` Skipped: ${r.skipped}`,
223
+ ` Corrections: ${r.corrections_file}`,
224
+ ];
225
+ if (r.retrain_summary) {
226
+ lines.push('', 'Retrain:', r.retrain_summary);
227
+ }
228
+ return lines.join('\n');
229
+ }
230
+ //# sourceMappingURL=train-cycle.js.map
@@ -0,0 +1,68 @@
1
+ export type Verifier = {
2
+ kind: 'build-pass';
3
+ cwd: string;
4
+ cmd: string;
5
+ } | {
6
+ kind: 'test-pass';
7
+ cwd: string;
8
+ cmd: string;
9
+ } | {
10
+ kind: 'lint-pass';
11
+ cwd: string;
12
+ cmd: string;
13
+ } | {
14
+ kind: 'regex-match';
15
+ pattern: string;
16
+ flags?: string;
17
+ } | {
18
+ kind: 'json-valid';
19
+ requireKeys?: string[];
20
+ } | {
21
+ kind: 'custom';
22
+ script: string;
23
+ };
24
+ export interface GrpoPrompt {
25
+ id?: string;
26
+ prompt: string;
27
+ system?: string;
28
+ verifier: Verifier;
29
+ tags?: string[];
30
+ }
31
+ export interface GrpoOptions {
32
+ studentModel?: string;
33
+ prompts: GrpoPrompt[];
34
+ groupSize?: number;
35
+ iters?: number;
36
+ learningRate?: number;
37
+ klBeta?: number;
38
+ outputDir?: string;
39
+ runnerCmd?: string;
40
+ dryRun?: boolean;
41
+ }
42
+ export interface RolloutResult {
43
+ prompt_id: string;
44
+ completions: Array<{
45
+ text: string;
46
+ reward: number;
47
+ verifier_ok: boolean;
48
+ }>;
49
+ advantage: number[];
50
+ }
51
+ export interface GrpoResult {
52
+ ok: boolean;
53
+ output_dir: string;
54
+ rollouts: RolloutResult[];
55
+ iterations_run: number;
56
+ mean_reward: number;
57
+ log: string;
58
+ }
59
+ /** Apply a verifier to a completion. Returns { ok, reward ∈ [0,1] }. */
60
+ export declare function verify(v: Verifier, completion: string): Promise<{
61
+ ok: boolean;
62
+ reward: number;
63
+ }>;
64
+ export declare function runGrpoRollouts(opts: GrpoOptions): Promise<GrpoResult>;
65
+ /** Default verifier suite for kbot: regex + JSON validity on common code gen. */
66
+ export declare const DEFAULT_VERIFIER_SUITE: GrpoPrompt[];
67
+ export declare function formatGrpoReport(r: GrpoResult): string;
68
+ //# sourceMappingURL=train-grpo.d.ts.map
@@ -0,0 +1,206 @@
1
+ // train-grpo — Group Relative Policy Optimization scaffolding.
2
+ // GRPO generates N completions per prompt, scores with a verifiable reward,
3
+ // and reinforces the best. No reward model needed — the oracle is the verifier.
4
+ //
5
+ // Suitable verifiers for kbot:
6
+ // build-pass — does the emitted code compile / npm run build succeed?
7
+ // test-pass — does `npm test` / vitest succeed on the generated change?
8
+ // lint-pass — eslint / tsc --noEmit
9
+ // regex-match — output contains a required pattern
10
+ // json-valid — output parses as JSON and matches schema
11
+ //
12
+ // This module writes GRPO config + delegates to an external GRPO runner
13
+ // (mlx-grpo / trl-mlx / custom). Runner selection is pluggable.
14
+ import { existsSync, mkdirSync, writeFileSync, appendFileSync } from 'node:fs';
15
+ import { join, resolve } from 'node:path';
16
+ import { homedir, tmpdir } from 'node:os';
17
+ import { execSync, spawnSync } from 'node:child_process';
18
+ import { randomUUID } from 'node:crypto';
19
+ function shell(cmd, cwd, timeout = 120_000) {
20
+ try {
21
+ const out = execSync(cmd, {
22
+ encoding: 'utf-8',
23
+ stdio: ['pipe', 'pipe', 'pipe'],
24
+ maxBuffer: 20 * 1024 * 1024,
25
+ timeout,
26
+ cwd,
27
+ });
28
+ return { ok: true, output: out.toString() };
29
+ }
30
+ catch (err) {
31
+ const e = err;
32
+ return { ok: (e.status === 0), output: [e.stdout, e.stderr, e.message].filter(Boolean).join('\n') };
33
+ }
34
+ }
35
+ async function generateRollout(model, system, prompt, temperature = 0.8) {
36
+ const res = await fetch('http://localhost:11434/api/chat', {
37
+ method: 'POST',
38
+ headers: { 'Content-Type': 'application/json' },
39
+ body: JSON.stringify({
40
+ model,
41
+ stream: false,
42
+ messages: [
43
+ ...(system ? [{ role: 'system', content: system }] : []),
44
+ { role: 'user', content: prompt },
45
+ ],
46
+ options: { num_predict: 2048, temperature },
47
+ }),
48
+ signal: AbortSignal.timeout(180_000),
49
+ });
50
+ if (!res.ok)
51
+ throw new Error(`Ollama HTTP ${res.status}`);
52
+ const data = await res.json();
53
+ return data.message?.content || '';
54
+ }
55
+ /** Apply a verifier to a completion. Returns { ok, reward ∈ [0,1] }. */
56
+ export async function verify(v, completion) {
57
+ switch (v.kind) {
58
+ case 'regex-match': {
59
+ try {
60
+ const re = new RegExp(v.pattern, v.flags);
61
+ return { ok: re.test(completion), reward: re.test(completion) ? 1 : 0 };
62
+ }
63
+ catch {
64
+ return { ok: false, reward: 0 };
65
+ }
66
+ }
67
+ case 'json-valid': {
68
+ // Try to extract JSON from ```json blocks or bare object
69
+ const match = completion.match(/```json\s*([\s\S]*?)```/) || completion.match(/\{[\s\S]*\}/);
70
+ if (!match)
71
+ return { ok: false, reward: 0 };
72
+ try {
73
+ const parsed = JSON.parse(match[1] || match[0]);
74
+ if (v.requireKeys) {
75
+ for (const k of v.requireKeys) {
76
+ if (!(k in parsed))
77
+ return { ok: false, reward: 0.3 };
78
+ }
79
+ }
80
+ return { ok: true, reward: 1 };
81
+ }
82
+ catch {
83
+ return { ok: false, reward: 0 };
84
+ }
85
+ }
86
+ case 'build-pass':
87
+ case 'test-pass':
88
+ case 'lint-pass': {
89
+ // Extract code blocks from completion, write to a scratch dir, run the command
90
+ const scratch = join(tmpdir(), `grpo-${randomUUID()}`);
91
+ mkdirSync(scratch, { recursive: true });
92
+ const codeMatch = completion.match(/```[a-zA-Z]*\n([\s\S]*?)```/);
93
+ const code = codeMatch ? codeMatch[1] : completion;
94
+ writeFileSync(join(scratch, 'out.txt'), code);
95
+ const r = shell(v.cmd, v.cwd || scratch, 180_000);
96
+ return { ok: r.ok, reward: r.ok ? 1 : 0 };
97
+ }
98
+ case 'custom': {
99
+ const script = resolve(v.script);
100
+ if (!existsSync(script))
101
+ return { ok: false, reward: 0 };
102
+ const res = spawnSync(script, [], { input: completion, encoding: 'utf-8', timeout: 60_000 });
103
+ return { ok: res.status === 0, reward: res.status === 0 ? 1 : 0 };
104
+ }
105
+ }
106
+ }
107
+ /** Compute group-relative advantages: (reward - group_mean) / group_std */
108
+ function advantages(rewards) {
109
+ if (rewards.length === 0)
110
+ return [];
111
+ const mean = rewards.reduce((a, b) => a + b, 0) / rewards.length;
112
+ const variance = rewards.reduce((a, b) => a + (b - mean) ** 2, 0) / rewards.length;
113
+ const std = Math.sqrt(variance) || 1e-6;
114
+ return rewards.map(r => (r - mean) / std);
115
+ }
116
+ export async function runGrpoRollouts(opts) {
117
+ const studentModel = opts.studentModel ?? 'kernel-coder:latest';
118
+ const groupSize = opts.groupSize ?? 8;
119
+ const outputDir = opts.outputDir ?? join(homedir(), '.kbot', 'teacher', 'grpo', `run-${Date.now()}`);
120
+ if (!existsSync(outputDir))
121
+ mkdirSync(outputDir, { recursive: true });
122
+ const rollouts = [];
123
+ let totalReward = 0;
124
+ let totalCount = 0;
125
+ for (const p of opts.prompts) {
126
+ const id = p.id ?? randomUUID();
127
+ const completions = [];
128
+ for (let i = 0; i < groupSize; i++) {
129
+ try {
130
+ const text = await generateRollout(studentModel, p.system || '', p.prompt, 0.8);
131
+ const v = await verify(p.verifier, text);
132
+ completions.push({ text, reward: v.reward, verifier_ok: v.ok });
133
+ totalReward += v.reward;
134
+ totalCount++;
135
+ }
136
+ catch (err) {
137
+ completions.push({ text: '', reward: 0, verifier_ok: false });
138
+ totalCount++;
139
+ }
140
+ }
141
+ const adv = advantages(completions.map(c => c.reward));
142
+ rollouts.push({ prompt_id: id, completions, advantage: adv });
143
+ // Persist rollout to JSONL (consumed by GRPO updater)
144
+ appendFileSync(join(outputDir, 'rollouts.jsonl'), JSON.stringify({
145
+ id, prompt: p.prompt, system: p.system,
146
+ rollouts: completions.map((c, k) => ({ text: c.text, reward: c.reward, advantage: adv[k] })),
147
+ }) + '\n');
148
+ }
149
+ const meanReward = totalCount > 0 ? totalReward / totalCount : 0;
150
+ // Write GRPO config for external runner
151
+ const grpoConfig = {
152
+ student_model: studentModel,
153
+ group_size: groupSize,
154
+ iters: opts.iters ?? 100,
155
+ learning_rate: opts.learningRate ?? 5e-6,
156
+ kl_beta: opts.klBeta ?? 0.05,
157
+ rollouts_path: join(outputDir, 'rollouts.jsonl'),
158
+ output_dir: outputDir,
159
+ };
160
+ writeFileSync(join(outputDir, 'grpo-config.json'), JSON.stringify(grpoConfig, null, 2));
161
+ let log = `Wrote ${rollouts.length} prompt groups × ${groupSize} rollouts to ${outputDir}`;
162
+ if (!opts.dryRun && opts.runnerCmd) {
163
+ const r = shell(`${opts.runnerCmd} --config ${join(outputDir, 'grpo-config.json')}`);
164
+ log += '\n' + r.output.split('\n').slice(-10).join('\n');
165
+ }
166
+ else if (!opts.dryRun) {
167
+ log += '\nNo --runner-cmd given. Rollouts collected; invoke an external GRPO runner on rollouts.jsonl.';
168
+ }
169
+ return {
170
+ ok: true,
171
+ output_dir: outputDir,
172
+ rollouts,
173
+ iterations_run: 0,
174
+ mean_reward: Math.round(meanReward * 1000) / 1000,
175
+ log,
176
+ };
177
+ }
178
+ /** Default verifier suite for kbot: regex + JSON validity on common code gen. */
179
+ export const DEFAULT_VERIFIER_SUITE = [
180
+ {
181
+ prompt: 'Write a TypeScript function `fib(n: number): number` that returns the nth Fibonacci number. Return only the function, in a ```typescript code block.',
182
+ verifier: { kind: 'regex-match', pattern: 'function fib\\s*\\(\\s*n\\s*:\\s*number', flags: 'i' },
183
+ },
184
+ {
185
+ prompt: 'Return a JSON object with keys "name" (string) and "version" (string) for a hypothetical npm package called "example-tool" at version 1.0.0. Only JSON, no prose.',
186
+ verifier: { kind: 'json-valid', requireKeys: ['name', 'version'] },
187
+ },
188
+ {
189
+ prompt: 'Write a Python function `def is_prime(n: int) -> bool:` that returns True if n is prime. Return only the function in a ```python code block.',
190
+ verifier: { kind: 'regex-match', pattern: 'def is_prime\\s*\\(', flags: 'i' },
191
+ },
192
+ ];
193
+ export function formatGrpoReport(r) {
194
+ return [
195
+ 'train-grpo',
196
+ '─'.repeat(40),
197
+ ` Status: ${r.ok ? 'OK' : 'FAIL'}`,
198
+ ` Output dir: ${r.output_dir}`,
199
+ ` Prompt groups: ${r.rollouts.length}`,
200
+ ` Mean reward: ${r.mean_reward.toFixed(3)}`,
201
+ '',
202
+ `Log:`,
203
+ r.log,
204
+ ].join('\n');
205
+ }
206
+ //# sourceMappingURL=train-grpo.js.map
@@ -0,0 +1,26 @@
1
+ export type MergeMethod = 'ties' | 'slerp' | 'dare_ties' | 'linear' | 'passthrough';
2
+ export interface MergeOptions {
3
+ method?: MergeMethod;
4
+ baseModel: string;
5
+ models: Array<{
6
+ model: string;
7
+ weight?: number;
8
+ density?: number;
9
+ }>;
10
+ outputName?: string;
11
+ outputDir?: string;
12
+ dtype?: 'float16' | 'bfloat16' | 'float32';
13
+ deploy?: boolean;
14
+ }
15
+ export interface MergeResult {
16
+ ok: boolean;
17
+ output_dir: string;
18
+ config_path: string;
19
+ ollama_name?: string;
20
+ log: string;
21
+ }
22
+ export declare function mergeModels(opts: MergeOptions): Promise<MergeResult>;
23
+ /** Convenience: sensible default TIES blend for kbot. */
24
+ export declare function mergeKbotDefault(): Promise<MergeResult>;
25
+ export declare function formatMergeReport(r: MergeResult): string;
26
+ //# sourceMappingURL=train-merge.d.ts.map
@@ -0,0 +1,148 @@
1
+ // train-merge — model merging via MergeKit (TIES / SLERP / DARE).
2
+ // Also documents the MoE swap path (DeepSeek-V2-Lite-16B, Qwen3-MoE).
3
+ //
4
+ // MergeKit must be installed: pip install mergekit
5
+ import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
6
+ import { join } from 'node:path';
7
+ import { homedir } from 'node:os';
8
+ import { execSync } from 'node:child_process';
9
+ function hasBin(bin) {
10
+ try {
11
+ execSync(`which ${bin}`, { stdio: 'ignore' });
12
+ return true;
13
+ }
14
+ catch {
15
+ return false;
16
+ }
17
+ }
18
+ function shell(cmd) {
19
+ try {
20
+ const out = execSync(cmd, {
21
+ encoding: 'utf-8',
22
+ stdio: ['pipe', 'pipe', 'pipe'],
23
+ maxBuffer: 100 * 1024 * 1024,
24
+ timeout: 60 * 60 * 1000,
25
+ });
26
+ return { ok: true, output: out.toString() };
27
+ }
28
+ catch (err) {
29
+ const e = err;
30
+ return { ok: false, output: [e.stdout, e.stderr, e.message].filter(Boolean).join('\n') };
31
+ }
32
+ }
33
+ /** Generate a MergeKit YAML config. */
34
+ function buildConfig(opts) {
35
+ const method = opts.method ?? 'ties';
36
+ const dtype = opts.dtype ?? 'bfloat16';
37
+ const modelsYaml = opts.models.map(m => {
38
+ const params = [];
39
+ if (m.weight != null)
40
+ params.push(`weight: ${m.weight}`);
41
+ if (m.density != null)
42
+ params.push(`density: ${m.density}`);
43
+ const paramBlock = params.length > 0 ? `\n parameters:\n ${params.join('\n ')}` : '';
44
+ return ` - model: ${m.model}${paramBlock}`;
45
+ }).join('\n');
46
+ if (method === 'slerp') {
47
+ // SLERP requires exactly 2 models and uses 't' parameter
48
+ return [
49
+ `slices:`,
50
+ ` - sources:`,
51
+ ...opts.models.map((m) => ` - model: ${m.model}\n layer_range: [0, 32]`),
52
+ `merge_method: slerp`,
53
+ `base_model: ${opts.baseModel}`,
54
+ `parameters:`,
55
+ ` t:`,
56
+ ` - filter: self_attn`,
57
+ ` value: [0, 0.5, 0.3, 0.7, 1]`,
58
+ ` - filter: mlp`,
59
+ ` value: [1, 0.5, 0.7, 0.3, 0]`,
60
+ ` - value: 0.5`,
61
+ `dtype: ${dtype}`,
62
+ ].join('\n');
63
+ }
64
+ return [
65
+ `models:`,
66
+ modelsYaml,
67
+ `merge_method: ${method}`,
68
+ `base_model: ${opts.baseModel}`,
69
+ `parameters:`,
70
+ ` normalize: true`,
71
+ `dtype: ${dtype}`,
72
+ ].join('\n');
73
+ }
74
+ export async function mergeModels(opts) {
75
+ const outputName = opts.outputName ?? `kernel-merged-${Date.now()}`;
76
+ const outputDir = opts.outputDir ?? join(homedir(), '.kbot', 'teacher', 'merges', outputName);
77
+ if (!existsSync(outputDir))
78
+ mkdirSync(outputDir, { recursive: true });
79
+ const config = buildConfig(opts);
80
+ const configPath = join(outputDir, 'merge-config.yaml');
81
+ writeFileSync(configPath, config);
82
+ if (!hasBin('mergekit-yaml')) {
83
+ return {
84
+ ok: false,
85
+ output_dir: outputDir,
86
+ config_path: configPath,
87
+ log: 'mergekit not installed. Install: pip install mergekit',
88
+ };
89
+ }
90
+ const cmd = `mergekit-yaml ${configPath} ${outputDir} --cuda 0 --copy-tokenizer`;
91
+ const r = shell(cmd);
92
+ let ollamaName;
93
+ if (r.ok && opts.deploy && hasBin('ollama')) {
94
+ const modelfile = [
95
+ `FROM ${outputDir}`,
96
+ `PARAMETER temperature 0.2`,
97
+ `SYSTEM "Merged model: ${opts.models.map(m => m.model).join(' + ')} via ${opts.method ?? 'ties'}."`,
98
+ ].join('\n');
99
+ const modelfilePath = join(outputDir, 'Modelfile');
100
+ writeFileSync(modelfilePath, modelfile);
101
+ ollamaName = outputName;
102
+ shell(`ollama create ${ollamaName} -f ${modelfilePath}`);
103
+ }
104
+ return {
105
+ ok: r.ok,
106
+ output_dir: outputDir,
107
+ config_path: configPath,
108
+ ollama_name: ollamaName,
109
+ log: r.output.split('\n').slice(-15).join('\n'),
110
+ };
111
+ }
112
+ /** Convenience: sensible default TIES blend for kbot. */
113
+ export async function mergeKbotDefault() {
114
+ return mergeModels({
115
+ method: 'ties',
116
+ baseModel: 'Qwen/Qwen2.5-Coder-7B-Instruct',
117
+ models: [
118
+ { model: 'Qwen/Qwen2.5-Coder-7B-Instruct', weight: 0.5, density: 0.5 },
119
+ { model: 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', weight: 0.3, density: 0.5 },
120
+ { model: 'mlx-community/kernel-coder-self-latest', weight: 0.2, density: 0.7 },
121
+ ],
122
+ outputName: 'kernel-triad-7b',
123
+ deploy: true,
124
+ });
125
+ }
126
+ // ── MoE swap path (documentation) ────────────────────────────────────
127
+ //
128
+ // To use DeepSeek-V2-Lite-16B (2.4B active params) as the base for train-self:
129
+ // kbot train-self --base-model mlx-community/DeepSeek-V2-Lite-Chat-4bit --mode default
130
+ // Expected: fits 36GB unified, outperforms dense 7B on reasoning by ~5–8% on our bench.
131
+ //
132
+ // Or Qwen3-MoE-30B-A3B (3B active):
133
+ // kbot train-self --base-model mlx-community/Qwen3-30B-A3B-Instruct-4bit --mode default
134
+ // Larger but still viable on 36GB. Prefer this for agent-trace mode.
135
+ export function formatMergeReport(r) {
136
+ const lines = [
137
+ 'train-merge',
138
+ '─'.repeat(40),
139
+ ` Status: ${r.ok ? 'OK' : 'FAIL'}`,
140
+ ` Output dir: ${r.output_dir}`,
141
+ ` Config: ${r.config_path}`,
142
+ ];
143
+ if (r.ollama_name)
144
+ lines.push(` Ollama: ${r.ollama_name}`);
145
+ lines.push('', 'Log (tail):', r.log);
146
+ return lines.join('\n');
147
+ }
148
+ //# sourceMappingURL=train-merge.js.map