@kernel.chat/kbot 3.97.1 → 3.98.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/agent.js +21 -0
- package/dist/cli.js +119 -0
- package/dist/teacher-logger.d.ts +71 -0
- package/dist/teacher-logger.js +162 -0
- package/dist/tools/ableton.js +176 -42
- package/dist/tools/estimation.d.ts +2 -0
- package/dist/tools/estimation.js +21 -0
- package/dist/tools/idempotency-check.d.ts +2 -0
- package/dist/tools/idempotency-check.js +31 -0
- package/dist/tools/idempotency-checker.d.ts +2 -0
- package/dist/tools/idempotency-checker.js +23 -0
- package/dist/tools/image-variation.d.ts +2 -0
- package/dist/tools/image-variation.js +31 -0
- package/dist/tools/index.js +1 -0
- package/dist/tools/one-prompt-producer.d.ts +2 -0
- package/dist/tools/one-prompt-producer.js +723 -0
- package/dist/tools/schedule-persistence.d.ts +2 -0
- package/dist/tools/schedule-persistence.js +19 -0
- package/dist/tools/sound-designer.js +278 -3
- package/dist/train-agent-trace.d.ts +29 -0
- package/dist/train-agent-trace.js +141 -0
- package/dist/train-curate.d.ts +25 -0
- package/dist/train-curate.js +354 -0
- package/dist/train-cycle.d.ts +22 -0
- package/dist/train-cycle.js +230 -0
- package/dist/train-grpo.d.ts +68 -0
- package/dist/train-grpo.js +206 -0
- package/dist/train-merge.d.ts +26 -0
- package/dist/train-merge.js +148 -0
- package/dist/train-self.d.ts +38 -0
- package/dist/train-self.js +232 -0
- package/package.json +1 -1
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
// train-grpo — Group Relative Policy Optimization scaffolding.
|
|
2
|
+
// GRPO generates N completions per prompt, scores with a verifiable reward,
|
|
3
|
+
// and reinforces the best. No reward model needed — the oracle is the verifier.
|
|
4
|
+
//
|
|
5
|
+
// Suitable verifiers for kbot:
|
|
6
|
+
// build-pass — does the emitted code compile / npm run build succeed?
|
|
7
|
+
// test-pass — does `npm test` / vitest succeed on the generated change?
|
|
8
|
+
// lint-pass — eslint / tsc --noEmit
|
|
9
|
+
// regex-match — output contains a required pattern
|
|
10
|
+
// json-valid — output parses as JSON and matches schema
|
|
11
|
+
//
|
|
12
|
+
// This module writes GRPO config + delegates to an external GRPO runner
|
|
13
|
+
// (mlx-grpo / trl-mlx / custom). Runner selection is pluggable.
|
|
14
|
+
import { existsSync, mkdirSync, writeFileSync, appendFileSync } from 'node:fs';
|
|
15
|
+
import { join, resolve } from 'node:path';
|
|
16
|
+
import { homedir, tmpdir } from 'node:os';
|
|
17
|
+
import { execSync, spawnSync } from 'node:child_process';
|
|
18
|
+
import { randomUUID } from 'node:crypto';
|
|
19
|
+
function shell(cmd, cwd, timeout = 120_000) {
|
|
20
|
+
try {
|
|
21
|
+
const out = execSync(cmd, {
|
|
22
|
+
encoding: 'utf-8',
|
|
23
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
24
|
+
maxBuffer: 20 * 1024 * 1024,
|
|
25
|
+
timeout,
|
|
26
|
+
cwd,
|
|
27
|
+
});
|
|
28
|
+
return { ok: true, output: out.toString() };
|
|
29
|
+
}
|
|
30
|
+
catch (err) {
|
|
31
|
+
const e = err;
|
|
32
|
+
return { ok: (e.status === 0), output: [e.stdout, e.stderr, e.message].filter(Boolean).join('\n') };
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
async function generateRollout(model, system, prompt, temperature = 0.8) {
|
|
36
|
+
const res = await fetch('http://localhost:11434/api/chat', {
|
|
37
|
+
method: 'POST',
|
|
38
|
+
headers: { 'Content-Type': 'application/json' },
|
|
39
|
+
body: JSON.stringify({
|
|
40
|
+
model,
|
|
41
|
+
stream: false,
|
|
42
|
+
messages: [
|
|
43
|
+
...(system ? [{ role: 'system', content: system }] : []),
|
|
44
|
+
{ role: 'user', content: prompt },
|
|
45
|
+
],
|
|
46
|
+
options: { num_predict: 2048, temperature },
|
|
47
|
+
}),
|
|
48
|
+
signal: AbortSignal.timeout(180_000),
|
|
49
|
+
});
|
|
50
|
+
if (!res.ok)
|
|
51
|
+
throw new Error(`Ollama HTTP ${res.status}`);
|
|
52
|
+
const data = await res.json();
|
|
53
|
+
return data.message?.content || '';
|
|
54
|
+
}
|
|
55
|
+
/** Apply a verifier to a completion. Returns { ok, reward ∈ [0,1] }. */
|
|
56
|
+
export async function verify(v, completion) {
|
|
57
|
+
switch (v.kind) {
|
|
58
|
+
case 'regex-match': {
|
|
59
|
+
try {
|
|
60
|
+
const re = new RegExp(v.pattern, v.flags);
|
|
61
|
+
return { ok: re.test(completion), reward: re.test(completion) ? 1 : 0 };
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
return { ok: false, reward: 0 };
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
case 'json-valid': {
|
|
68
|
+
// Try to extract JSON from ```json blocks or bare object
|
|
69
|
+
const match = completion.match(/```json\s*([\s\S]*?)```/) || completion.match(/\{[\s\S]*\}/);
|
|
70
|
+
if (!match)
|
|
71
|
+
return { ok: false, reward: 0 };
|
|
72
|
+
try {
|
|
73
|
+
const parsed = JSON.parse(match[1] || match[0]);
|
|
74
|
+
if (v.requireKeys) {
|
|
75
|
+
for (const k of v.requireKeys) {
|
|
76
|
+
if (!(k in parsed))
|
|
77
|
+
return { ok: false, reward: 0.3 };
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return { ok: true, reward: 1 };
|
|
81
|
+
}
|
|
82
|
+
catch {
|
|
83
|
+
return { ok: false, reward: 0 };
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
case 'build-pass':
|
|
87
|
+
case 'test-pass':
|
|
88
|
+
case 'lint-pass': {
|
|
89
|
+
// Extract code blocks from completion, write to a scratch dir, run the command
|
|
90
|
+
const scratch = join(tmpdir(), `grpo-${randomUUID()}`);
|
|
91
|
+
mkdirSync(scratch, { recursive: true });
|
|
92
|
+
const codeMatch = completion.match(/```[a-zA-Z]*\n([\s\S]*?)```/);
|
|
93
|
+
const code = codeMatch ? codeMatch[1] : completion;
|
|
94
|
+
writeFileSync(join(scratch, 'out.txt'), code);
|
|
95
|
+
const r = shell(v.cmd, v.cwd || scratch, 180_000);
|
|
96
|
+
return { ok: r.ok, reward: r.ok ? 1 : 0 };
|
|
97
|
+
}
|
|
98
|
+
case 'custom': {
|
|
99
|
+
const script = resolve(v.script);
|
|
100
|
+
if (!existsSync(script))
|
|
101
|
+
return { ok: false, reward: 0 };
|
|
102
|
+
const res = spawnSync(script, [], { input: completion, encoding: 'utf-8', timeout: 60_000 });
|
|
103
|
+
return { ok: res.status === 0, reward: res.status === 0 ? 1 : 0 };
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
/** Compute group-relative advantages: (reward - group_mean) / group_std */
|
|
108
|
+
function advantages(rewards) {
|
|
109
|
+
if (rewards.length === 0)
|
|
110
|
+
return [];
|
|
111
|
+
const mean = rewards.reduce((a, b) => a + b, 0) / rewards.length;
|
|
112
|
+
const variance = rewards.reduce((a, b) => a + (b - mean) ** 2, 0) / rewards.length;
|
|
113
|
+
const std = Math.sqrt(variance) || 1e-6;
|
|
114
|
+
return rewards.map(r => (r - mean) / std);
|
|
115
|
+
}
|
|
116
|
+
export async function runGrpoRollouts(opts) {
|
|
117
|
+
const studentModel = opts.studentModel ?? 'kernel-coder:latest';
|
|
118
|
+
const groupSize = opts.groupSize ?? 8;
|
|
119
|
+
const outputDir = opts.outputDir ?? join(homedir(), '.kbot', 'teacher', 'grpo', `run-${Date.now()}`);
|
|
120
|
+
if (!existsSync(outputDir))
|
|
121
|
+
mkdirSync(outputDir, { recursive: true });
|
|
122
|
+
const rollouts = [];
|
|
123
|
+
let totalReward = 0;
|
|
124
|
+
let totalCount = 0;
|
|
125
|
+
for (const p of opts.prompts) {
|
|
126
|
+
const id = p.id ?? randomUUID();
|
|
127
|
+
const completions = [];
|
|
128
|
+
for (let i = 0; i < groupSize; i++) {
|
|
129
|
+
try {
|
|
130
|
+
const text = await generateRollout(studentModel, p.system || '', p.prompt, 0.8);
|
|
131
|
+
const v = await verify(p.verifier, text);
|
|
132
|
+
completions.push({ text, reward: v.reward, verifier_ok: v.ok });
|
|
133
|
+
totalReward += v.reward;
|
|
134
|
+
totalCount++;
|
|
135
|
+
}
|
|
136
|
+
catch (err) {
|
|
137
|
+
completions.push({ text: '', reward: 0, verifier_ok: false });
|
|
138
|
+
totalCount++;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
const adv = advantages(completions.map(c => c.reward));
|
|
142
|
+
rollouts.push({ prompt_id: id, completions, advantage: adv });
|
|
143
|
+
// Persist rollout to JSONL (consumed by GRPO updater)
|
|
144
|
+
appendFileSync(join(outputDir, 'rollouts.jsonl'), JSON.stringify({
|
|
145
|
+
id, prompt: p.prompt, system: p.system,
|
|
146
|
+
rollouts: completions.map((c, k) => ({ text: c.text, reward: c.reward, advantage: adv[k] })),
|
|
147
|
+
}) + '\n');
|
|
148
|
+
}
|
|
149
|
+
const meanReward = totalCount > 0 ? totalReward / totalCount : 0;
|
|
150
|
+
// Write GRPO config for external runner
|
|
151
|
+
const grpoConfig = {
|
|
152
|
+
student_model: studentModel,
|
|
153
|
+
group_size: groupSize,
|
|
154
|
+
iters: opts.iters ?? 100,
|
|
155
|
+
learning_rate: opts.learningRate ?? 5e-6,
|
|
156
|
+
kl_beta: opts.klBeta ?? 0.05,
|
|
157
|
+
rollouts_path: join(outputDir, 'rollouts.jsonl'),
|
|
158
|
+
output_dir: outputDir,
|
|
159
|
+
};
|
|
160
|
+
writeFileSync(join(outputDir, 'grpo-config.json'), JSON.stringify(grpoConfig, null, 2));
|
|
161
|
+
let log = `Wrote ${rollouts.length} prompt groups × ${groupSize} rollouts to ${outputDir}`;
|
|
162
|
+
if (!opts.dryRun && opts.runnerCmd) {
|
|
163
|
+
const r = shell(`${opts.runnerCmd} --config ${join(outputDir, 'grpo-config.json')}`);
|
|
164
|
+
log += '\n' + r.output.split('\n').slice(-10).join('\n');
|
|
165
|
+
}
|
|
166
|
+
else if (!opts.dryRun) {
|
|
167
|
+
log += '\nNo --runner-cmd given. Rollouts collected; invoke an external GRPO runner on rollouts.jsonl.';
|
|
168
|
+
}
|
|
169
|
+
return {
|
|
170
|
+
ok: true,
|
|
171
|
+
output_dir: outputDir,
|
|
172
|
+
rollouts,
|
|
173
|
+
iterations_run: 0,
|
|
174
|
+
mean_reward: Math.round(meanReward * 1000) / 1000,
|
|
175
|
+
log,
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
/** Default verifier suite for kbot: regex + JSON validity on common code gen. */
|
|
179
|
+
export const DEFAULT_VERIFIER_SUITE = [
|
|
180
|
+
{
|
|
181
|
+
prompt: 'Write a TypeScript function `fib(n: number): number` that returns the nth Fibonacci number. Return only the function, in a ```typescript code block.',
|
|
182
|
+
verifier: { kind: 'regex-match', pattern: 'function fib\\s*\\(\\s*n\\s*:\\s*number', flags: 'i' },
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
prompt: 'Return a JSON object with keys "name" (string) and "version" (string) for a hypothetical npm package called "example-tool" at version 1.0.0. Only JSON, no prose.',
|
|
186
|
+
verifier: { kind: 'json-valid', requireKeys: ['name', 'version'] },
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
prompt: 'Write a Python function `def is_prime(n: int) -> bool:` that returns True if n is prime. Return only the function in a ```python code block.',
|
|
190
|
+
verifier: { kind: 'regex-match', pattern: 'def is_prime\\s*\\(', flags: 'i' },
|
|
191
|
+
},
|
|
192
|
+
];
|
|
193
|
+
export function formatGrpoReport(r) {
|
|
194
|
+
return [
|
|
195
|
+
'train-grpo',
|
|
196
|
+
'─'.repeat(40),
|
|
197
|
+
` Status: ${r.ok ? 'OK' : 'FAIL'}`,
|
|
198
|
+
` Output dir: ${r.output_dir}`,
|
|
199
|
+
` Prompt groups: ${r.rollouts.length}`,
|
|
200
|
+
` Mean reward: ${r.mean_reward.toFixed(3)}`,
|
|
201
|
+
'',
|
|
202
|
+
`Log:`,
|
|
203
|
+
r.log,
|
|
204
|
+
].join('\n');
|
|
205
|
+
}
|
|
206
|
+
//# sourceMappingURL=train-grpo.js.map
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export type MergeMethod = 'ties' | 'slerp' | 'dare_ties' | 'linear' | 'passthrough';
|
|
2
|
+
export interface MergeOptions {
|
|
3
|
+
method?: MergeMethod;
|
|
4
|
+
baseModel: string;
|
|
5
|
+
models: Array<{
|
|
6
|
+
model: string;
|
|
7
|
+
weight?: number;
|
|
8
|
+
density?: number;
|
|
9
|
+
}>;
|
|
10
|
+
outputName?: string;
|
|
11
|
+
outputDir?: string;
|
|
12
|
+
dtype?: 'float16' | 'bfloat16' | 'float32';
|
|
13
|
+
deploy?: boolean;
|
|
14
|
+
}
|
|
15
|
+
export interface MergeResult {
|
|
16
|
+
ok: boolean;
|
|
17
|
+
output_dir: string;
|
|
18
|
+
config_path: string;
|
|
19
|
+
ollama_name?: string;
|
|
20
|
+
log: string;
|
|
21
|
+
}
|
|
22
|
+
export declare function mergeModels(opts: MergeOptions): Promise<MergeResult>;
|
|
23
|
+
/** Convenience: sensible default TIES blend for kbot. */
|
|
24
|
+
export declare function mergeKbotDefault(): Promise<MergeResult>;
|
|
25
|
+
export declare function formatMergeReport(r: MergeResult): string;
|
|
26
|
+
//# sourceMappingURL=train-merge.d.ts.map
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
// train-merge — model merging via MergeKit (TIES / SLERP / DARE).
|
|
2
|
+
// Also documents the MoE swap path (DeepSeek-V2-Lite-16B, Qwen3-MoE).
|
|
3
|
+
//
|
|
4
|
+
// MergeKit must be installed: pip install mergekit
|
|
5
|
+
import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
|
|
6
|
+
import { join } from 'node:path';
|
|
7
|
+
import { homedir } from 'node:os';
|
|
8
|
+
import { execSync } from 'node:child_process';
|
|
9
|
+
function hasBin(bin) {
|
|
10
|
+
try {
|
|
11
|
+
execSync(`which ${bin}`, { stdio: 'ignore' });
|
|
12
|
+
return true;
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
function shell(cmd) {
|
|
19
|
+
try {
|
|
20
|
+
const out = execSync(cmd, {
|
|
21
|
+
encoding: 'utf-8',
|
|
22
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
23
|
+
maxBuffer: 100 * 1024 * 1024,
|
|
24
|
+
timeout: 60 * 60 * 1000,
|
|
25
|
+
});
|
|
26
|
+
return { ok: true, output: out.toString() };
|
|
27
|
+
}
|
|
28
|
+
catch (err) {
|
|
29
|
+
const e = err;
|
|
30
|
+
return { ok: false, output: [e.stdout, e.stderr, e.message].filter(Boolean).join('\n') };
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/** Generate a MergeKit YAML config. */
|
|
34
|
+
function buildConfig(opts) {
|
|
35
|
+
const method = opts.method ?? 'ties';
|
|
36
|
+
const dtype = opts.dtype ?? 'bfloat16';
|
|
37
|
+
const modelsYaml = opts.models.map(m => {
|
|
38
|
+
const params = [];
|
|
39
|
+
if (m.weight != null)
|
|
40
|
+
params.push(`weight: ${m.weight}`);
|
|
41
|
+
if (m.density != null)
|
|
42
|
+
params.push(`density: ${m.density}`);
|
|
43
|
+
const paramBlock = params.length > 0 ? `\n parameters:\n ${params.join('\n ')}` : '';
|
|
44
|
+
return ` - model: ${m.model}${paramBlock}`;
|
|
45
|
+
}).join('\n');
|
|
46
|
+
if (method === 'slerp') {
|
|
47
|
+
// SLERP requires exactly 2 models and uses 't' parameter
|
|
48
|
+
return [
|
|
49
|
+
`slices:`,
|
|
50
|
+
` - sources:`,
|
|
51
|
+
...opts.models.map((m) => ` - model: ${m.model}\n layer_range: [0, 32]`),
|
|
52
|
+
`merge_method: slerp`,
|
|
53
|
+
`base_model: ${opts.baseModel}`,
|
|
54
|
+
`parameters:`,
|
|
55
|
+
` t:`,
|
|
56
|
+
` - filter: self_attn`,
|
|
57
|
+
` value: [0, 0.5, 0.3, 0.7, 1]`,
|
|
58
|
+
` - filter: mlp`,
|
|
59
|
+
` value: [1, 0.5, 0.7, 0.3, 0]`,
|
|
60
|
+
` - value: 0.5`,
|
|
61
|
+
`dtype: ${dtype}`,
|
|
62
|
+
].join('\n');
|
|
63
|
+
}
|
|
64
|
+
return [
|
|
65
|
+
`models:`,
|
|
66
|
+
modelsYaml,
|
|
67
|
+
`merge_method: ${method}`,
|
|
68
|
+
`base_model: ${opts.baseModel}`,
|
|
69
|
+
`parameters:`,
|
|
70
|
+
` normalize: true`,
|
|
71
|
+
`dtype: ${dtype}`,
|
|
72
|
+
].join('\n');
|
|
73
|
+
}
|
|
74
|
+
export async function mergeModels(opts) {
|
|
75
|
+
const outputName = opts.outputName ?? `kernel-merged-${Date.now()}`;
|
|
76
|
+
const outputDir = opts.outputDir ?? join(homedir(), '.kbot', 'teacher', 'merges', outputName);
|
|
77
|
+
if (!existsSync(outputDir))
|
|
78
|
+
mkdirSync(outputDir, { recursive: true });
|
|
79
|
+
const config = buildConfig(opts);
|
|
80
|
+
const configPath = join(outputDir, 'merge-config.yaml');
|
|
81
|
+
writeFileSync(configPath, config);
|
|
82
|
+
if (!hasBin('mergekit-yaml')) {
|
|
83
|
+
return {
|
|
84
|
+
ok: false,
|
|
85
|
+
output_dir: outputDir,
|
|
86
|
+
config_path: configPath,
|
|
87
|
+
log: 'mergekit not installed. Install: pip install mergekit',
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
const cmd = `mergekit-yaml ${configPath} ${outputDir} --cuda 0 --copy-tokenizer`;
|
|
91
|
+
const r = shell(cmd);
|
|
92
|
+
let ollamaName;
|
|
93
|
+
if (r.ok && opts.deploy && hasBin('ollama')) {
|
|
94
|
+
const modelfile = [
|
|
95
|
+
`FROM ${outputDir}`,
|
|
96
|
+
`PARAMETER temperature 0.2`,
|
|
97
|
+
`SYSTEM "Merged model: ${opts.models.map(m => m.model).join(' + ')} via ${opts.method ?? 'ties'}."`,
|
|
98
|
+
].join('\n');
|
|
99
|
+
const modelfilePath = join(outputDir, 'Modelfile');
|
|
100
|
+
writeFileSync(modelfilePath, modelfile);
|
|
101
|
+
ollamaName = outputName;
|
|
102
|
+
shell(`ollama create ${ollamaName} -f ${modelfilePath}`);
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
ok: r.ok,
|
|
106
|
+
output_dir: outputDir,
|
|
107
|
+
config_path: configPath,
|
|
108
|
+
ollama_name: ollamaName,
|
|
109
|
+
log: r.output.split('\n').slice(-15).join('\n'),
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
/** Convenience: sensible default TIES blend for kbot. */
|
|
113
|
+
export async function mergeKbotDefault() {
|
|
114
|
+
return mergeModels({
|
|
115
|
+
method: 'ties',
|
|
116
|
+
baseModel: 'Qwen/Qwen2.5-Coder-7B-Instruct',
|
|
117
|
+
models: [
|
|
118
|
+
{ model: 'Qwen/Qwen2.5-Coder-7B-Instruct', weight: 0.5, density: 0.5 },
|
|
119
|
+
{ model: 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', weight: 0.3, density: 0.5 },
|
|
120
|
+
{ model: 'mlx-community/kernel-coder-self-latest', weight: 0.2, density: 0.7 },
|
|
121
|
+
],
|
|
122
|
+
outputName: 'kernel-triad-7b',
|
|
123
|
+
deploy: true,
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
// ── MoE swap path (documentation) ────────────────────────────────────
|
|
127
|
+
//
|
|
128
|
+
// To use DeepSeek-V2-Lite-16B (2.4B active params) as the base for train-self:
|
|
129
|
+
// kbot train-self --base-model mlx-community/DeepSeek-V2-Lite-Chat-4bit --mode default
|
|
130
|
+
// Expected: fits 36GB unified, outperforms dense 7B on reasoning by ~5–8% on our bench.
|
|
131
|
+
//
|
|
132
|
+
// Or Qwen3-MoE-30B-A3B (3B active):
|
|
133
|
+
// kbot train-self --base-model mlx-community/Qwen3-30B-A3B-Instruct-4bit --mode default
|
|
134
|
+
// Larger but still viable on 36GB. Prefer this for agent-trace mode.
|
|
135
|
+
export function formatMergeReport(r) {
|
|
136
|
+
const lines = [
|
|
137
|
+
'train-merge',
|
|
138
|
+
'─'.repeat(40),
|
|
139
|
+
` Status: ${r.ok ? 'OK' : 'FAIL'}`,
|
|
140
|
+
` Output dir: ${r.output_dir}`,
|
|
141
|
+
` Config: ${r.config_path}`,
|
|
142
|
+
];
|
|
143
|
+
if (r.ollama_name)
|
|
144
|
+
lines.push(` Ollama: ${r.ollama_name}`);
|
|
145
|
+
lines.push('', 'Log (tail):', r.log);
|
|
146
|
+
return lines.join('\n');
|
|
147
|
+
}
|
|
148
|
+
//# sourceMappingURL=train-merge.js.map
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { type CurateMode } from './train-curate.js';
|
|
2
|
+
export interface TrainSelfOptions {
|
|
3
|
+
mode?: CurateMode;
|
|
4
|
+
baseModel?: string;
|
|
5
|
+
outputName?: string;
|
|
6
|
+
backend?: 'mlx' | 'unsloth' | 'llama-cpp' | 'together';
|
|
7
|
+
dryRun?: boolean;
|
|
8
|
+
skipCurate?: boolean;
|
|
9
|
+
skipTrain?: boolean;
|
|
10
|
+
skipDeploy?: boolean;
|
|
11
|
+
iters?: number;
|
|
12
|
+
batchSize?: number;
|
|
13
|
+
numLayers?: number;
|
|
14
|
+
learningRate?: number;
|
|
15
|
+
maxExamples?: number;
|
|
16
|
+
datasetPath?: string;
|
|
17
|
+
adapterPath?: string;
|
|
18
|
+
fusedPath?: string;
|
|
19
|
+
ggufPath?: string;
|
|
20
|
+
gradCheckpoint?: boolean;
|
|
21
|
+
}
|
|
22
|
+
interface StepResult {
|
|
23
|
+
step: string;
|
|
24
|
+
ok: boolean;
|
|
25
|
+
duration_ms: number;
|
|
26
|
+
details?: string;
|
|
27
|
+
}
|
|
28
|
+
export declare function trainSelf(opts?: TrainSelfOptions): Promise<{
|
|
29
|
+
results: StepResult[];
|
|
30
|
+
summary: string;
|
|
31
|
+
}>;
|
|
32
|
+
/** CLI-facing: pretty-print a run. */
|
|
33
|
+
export declare function formatTrainSelfReport(r: {
|
|
34
|
+
results: StepResult[];
|
|
35
|
+
summary: string;
|
|
36
|
+
}): string;
|
|
37
|
+
export {};
|
|
38
|
+
//# sourceMappingURL=train-self.d.ts.map
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
// train-self — one command to mine local corpus, fine-tune, deploy as Ollama model.
|
|
2
|
+
//
|
|
3
|
+
// Pipeline:
|
|
4
|
+
// 1. curate — score/filter traces from ~/.kbot/teacher/ + observer/
|
|
5
|
+
// 2. prepare — convert to training format (already OpenAI JSONL, mostly a validator pass)
|
|
6
|
+
// 3. train — launch mlx_lm.lora (or chosen backend)
|
|
7
|
+
// 4. fuse — merge LoRA adapter into base
|
|
8
|
+
// 5. convert — MLX → GGUF
|
|
9
|
+
// 6. deploy — register as Ollama model
|
|
10
|
+
//
|
|
11
|
+
// Presets:
|
|
12
|
+
// default — general-purpose LoRA on whole corpus
|
|
13
|
+
// reasoning — s1-style distill on thinking traces
|
|
14
|
+
// agent-trace — tool-use specialization (Phase 4)
|
|
15
|
+
// code-only — code-heavy filter
|
|
16
|
+
//
|
|
17
|
+
// Each stage is individually re-runnable via flags.
|
|
18
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
19
|
+
import { join, dirname } from 'node:path';
|
|
20
|
+
import { homedir } from 'node:os';
|
|
21
|
+
import { execSync } from 'node:child_process';
|
|
22
|
+
import { curate, formatCurateReport } from './train-curate.js';
|
|
23
|
+
/** MLX expects a directory with train.jsonl / valid.jsonl / test.jsonl. Split one file into that shape. */
|
|
24
|
+
function splitForMlx(datasetFile) {
|
|
25
|
+
const lines = readFileSync(datasetFile, 'utf-8').split('\n').filter(l => l.trim());
|
|
26
|
+
const dir = join(dirname(datasetFile), 'mlx-split');
|
|
27
|
+
if (!existsSync(dir))
|
|
28
|
+
mkdirSync(dir, { recursive: true });
|
|
29
|
+
// Shuffle deterministically (rotate) so rerun is stable given same input
|
|
30
|
+
const shuffled = [...lines];
|
|
31
|
+
// 80 / 10 / 10
|
|
32
|
+
const nValid = Math.max(1, Math.floor(shuffled.length * 0.1));
|
|
33
|
+
const nTest = Math.max(1, Math.floor(shuffled.length * 0.1));
|
|
34
|
+
const valid = shuffled.slice(0, nValid);
|
|
35
|
+
const test = shuffled.slice(nValid, nValid + nTest);
|
|
36
|
+
const train = shuffled.slice(nValid + nTest);
|
|
37
|
+
writeFileSync(join(dir, 'train.jsonl'), train.join('\n') + '\n');
|
|
38
|
+
writeFileSync(join(dir, 'valid.jsonl'), valid.join('\n') + '\n');
|
|
39
|
+
writeFileSync(join(dir, 'test.jsonl'), test.join('\n') + '\n');
|
|
40
|
+
return dir;
|
|
41
|
+
}
|
|
42
|
+
const DEFAULT_BASES = {
|
|
43
|
+
'default': 'mlx-community/Qwen2.5-Coder-7B-Instruct-4bit',
|
|
44
|
+
'reasoning': 'mlx-community/DeepSeek-R1-Distill-Qwen-7B-4bit',
|
|
45
|
+
'agent-trace': 'mlx-community/Qwen2.5-Coder-7B-Instruct-4bit',
|
|
46
|
+
'code-only': 'mlx-community/Qwen2.5-Coder-14B-Instruct-4bit',
|
|
47
|
+
};
|
|
48
|
+
function shell(cmd, cwd) {
|
|
49
|
+
try {
|
|
50
|
+
const output = execSync(cmd, {
|
|
51
|
+
encoding: 'utf-8',
|
|
52
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
53
|
+
maxBuffer: 50 * 1024 * 1024,
|
|
54
|
+
timeout: 4 * 60 * 60 * 1000, // 4h cap per step
|
|
55
|
+
cwd,
|
|
56
|
+
});
|
|
57
|
+
return { ok: true, output: output.toString() };
|
|
58
|
+
}
|
|
59
|
+
catch (err) {
|
|
60
|
+
const e = err;
|
|
61
|
+
return { ok: false, output: [e.stdout, e.stderr, e.message].filter(Boolean).join('\n') };
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
function hasBin(bin) {
|
|
65
|
+
try {
|
|
66
|
+
execSync(`which ${bin}`, { stdio: 'ignore' });
|
|
67
|
+
return true;
|
|
68
|
+
}
|
|
69
|
+
catch {
|
|
70
|
+
return false;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
export async function trainSelf(opts = {}) {
|
|
74
|
+
const mode = opts.mode ?? 'default';
|
|
75
|
+
const backend = opts.backend ?? 'mlx';
|
|
76
|
+
const baseModel = opts.baseModel ?? DEFAULT_BASES[mode];
|
|
77
|
+
const timestamp = Date.now();
|
|
78
|
+
const outputName = opts.outputName ?? `kernel-${mode === 'default' ? 'self' : mode}:v${timestamp}`;
|
|
79
|
+
const workDir = join(homedir(), '.kbot', 'teacher', 'runs', `${mode}-${timestamp}`);
|
|
80
|
+
if (!existsSync(workDir))
|
|
81
|
+
mkdirSync(workDir, { recursive: true });
|
|
82
|
+
const datasetPath = opts.datasetPath ?? join(workDir, 'dataset.jsonl');
|
|
83
|
+
const adapterPath = opts.adapterPath ?? join(workDir, 'adapters');
|
|
84
|
+
const fusedPath = opts.fusedPath ?? join(workDir, 'fused');
|
|
85
|
+
const ggufPath = opts.ggufPath ?? join(workDir, `${outputName.replace(/:/g, '-')}.gguf`);
|
|
86
|
+
const results = [];
|
|
87
|
+
const log = (step, ok, duration, details) => results.push({ step, ok, duration_ms: duration, details });
|
|
88
|
+
// ── Stage 1: Curate ─────────────────────────────────────────────
|
|
89
|
+
if (!opts.skipCurate) {
|
|
90
|
+
const t0 = Date.now();
|
|
91
|
+
try {
|
|
92
|
+
const r = curate({
|
|
93
|
+
mode,
|
|
94
|
+
output: datasetPath,
|
|
95
|
+
maxExamples: opts.maxExamples ?? (mode === 'reasoning' ? 1500 : 3000),
|
|
96
|
+
});
|
|
97
|
+
log('curate', r.kept > 0, Date.now() - t0, formatCurateReport(r));
|
|
98
|
+
if (r.kept === 0) {
|
|
99
|
+
return {
|
|
100
|
+
results,
|
|
101
|
+
summary: `No examples passed the curator. Seed ~/.kbot/teacher/traces.jsonl by using kbot normally, then retry.`,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
catch (err) {
|
|
106
|
+
log('curate', false, Date.now() - t0, err instanceof Error ? err.message : String(err));
|
|
107
|
+
return { results, summary: 'Curate failed. See results.' };
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
if (opts.dryRun) {
|
|
111
|
+
return { results, summary: `Dry run. Dataset at ${datasetPath}. Would train ${baseModel} → ${outputName}.` };
|
|
112
|
+
}
|
|
113
|
+
// ── Stage 2: Train ──────────────────────────────────────────────
|
|
114
|
+
if (!opts.skipTrain) {
|
|
115
|
+
if (backend === 'mlx' && !hasBin('mlx_lm.lora')) {
|
|
116
|
+
log('train', false, 0, 'mlx_lm.lora not found. Install: pip install mlx-lm');
|
|
117
|
+
return { results, summary: 'MLX not installed.' };
|
|
118
|
+
}
|
|
119
|
+
const t0 = Date.now();
|
|
120
|
+
if (backend === 'mlx') {
|
|
121
|
+
const iters = opts.iters ?? (mode === 'reasoning' ? 1500 : 1000);
|
|
122
|
+
const batch = opts.batchSize ?? 1;
|
|
123
|
+
const layers = opts.numLayers ?? 8;
|
|
124
|
+
const lr = opts.learningRate ?? 1e-5;
|
|
125
|
+
const grad = opts.gradCheckpoint !== false ? '--grad-checkpoint' : '';
|
|
126
|
+
// MLX expects a directory with train/valid/test.jsonl
|
|
127
|
+
const dataDir = splitForMlx(datasetPath);
|
|
128
|
+
const cmd = [
|
|
129
|
+
'mlx_lm.lora',
|
|
130
|
+
'--model', baseModel,
|
|
131
|
+
'--train',
|
|
132
|
+
'--data', dataDir,
|
|
133
|
+
'--batch-size', String(batch),
|
|
134
|
+
'--num-layers', String(layers),
|
|
135
|
+
'--iters', String(iters),
|
|
136
|
+
'--learning-rate', String(lr),
|
|
137
|
+
'--adapter-path', adapterPath,
|
|
138
|
+
grad,
|
|
139
|
+
].filter(Boolean).join(' ');
|
|
140
|
+
const r = shell(cmd);
|
|
141
|
+
log('train', r.ok, Date.now() - t0, r.output.split('\n').slice(-15).join('\n'));
|
|
142
|
+
if (!r.ok)
|
|
143
|
+
return { results, summary: 'Training failed. See log.' };
|
|
144
|
+
}
|
|
145
|
+
else if (backend === 'together') {
|
|
146
|
+
// Cloud fallback — delegate to existing train_start tool expectations
|
|
147
|
+
log('train', false, Date.now() - t0, 'Cloud backend: use `kbot train_start --backend together` directly; train-self cloud flow not yet implemented.');
|
|
148
|
+
return { results, summary: 'Cloud backend not wired in train-self yet.' };
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
log('train', false, Date.now() - t0, `Backend ${backend} not yet wired; use mlx.`);
|
|
152
|
+
return { results, summary: `Backend ${backend} not supported in train-self yet.` };
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
// ── Stage 3: Fuse adapter ───────────────────────────────────────
|
|
156
|
+
if (!opts.skipTrain && hasBin('mlx_lm.fuse')) {
|
|
157
|
+
const t0 = Date.now();
|
|
158
|
+
const cmd = [
|
|
159
|
+
'mlx_lm.fuse',
|
|
160
|
+
'--model', baseModel,
|
|
161
|
+
'--adapter-path', adapterPath,
|
|
162
|
+
'--save-path', fusedPath,
|
|
163
|
+
].join(' ');
|
|
164
|
+
const r = shell(cmd);
|
|
165
|
+
log('fuse', r.ok, Date.now() - t0, r.output.split('\n').slice(-8).join('\n'));
|
|
166
|
+
if (!r.ok)
|
|
167
|
+
return { results, summary: 'Fuse failed.' };
|
|
168
|
+
}
|
|
169
|
+
// ── Stage 4: Convert to GGUF (for Ollama) ───────────────────────
|
|
170
|
+
if (!opts.skipDeploy) {
|
|
171
|
+
const t0 = Date.now();
|
|
172
|
+
// Preferred: llama.cpp convert script
|
|
173
|
+
if (hasBin('python3')) {
|
|
174
|
+
const convertCmd = `python3 -m mlx_lm.convert --hf-path ${fusedPath} --quantize --q-bits 4 --mlx-path ${fusedPath}-mlx4`;
|
|
175
|
+
const r = shell(convertCmd);
|
|
176
|
+
log('quantize', r.ok, Date.now() - t0, r.output.split('\n').slice(-8).join('\n'));
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
log('quantize', false, Date.now() - t0, 'python3 not available');
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
// ── Stage 5: Deploy to Ollama ───────────────────────────────────
|
|
183
|
+
if (!opts.skipDeploy && hasBin('ollama')) {
|
|
184
|
+
const t0 = Date.now();
|
|
185
|
+
// Write a Modelfile that points Ollama at the fused weights.
|
|
186
|
+
// For a first pass we use the GGUF path if it exists, else the fused dir.
|
|
187
|
+
const modelfilePath = join(workDir, 'Modelfile');
|
|
188
|
+
const fromPath = existsSync(ggufPath) ? ggufPath : fusedPath;
|
|
189
|
+
const modelfile = [
|
|
190
|
+
`FROM ${fromPath}`,
|
|
191
|
+
`PARAMETER temperature 0.2`,
|
|
192
|
+
`PARAMETER top_p 0.9`,
|
|
193
|
+
`SYSTEM "You are kbot's self-trained assistant (${mode} mode). You were fine-tuned on the operator's own agent sessions."`,
|
|
194
|
+
].join('\n');
|
|
195
|
+
try {
|
|
196
|
+
require('node:fs').writeFileSync(modelfilePath, modelfile);
|
|
197
|
+
const cmd = `ollama create ${outputName} -f ${modelfilePath}`;
|
|
198
|
+
const r = shell(cmd);
|
|
199
|
+
log('deploy', r.ok, Date.now() - t0, r.output.split('\n').slice(-8).join('\n'));
|
|
200
|
+
if (!r.ok)
|
|
201
|
+
return { results, summary: 'Deploy failed.' };
|
|
202
|
+
}
|
|
203
|
+
catch (err) {
|
|
204
|
+
log('deploy', false, Date.now() - t0, err instanceof Error ? err.message : String(err));
|
|
205
|
+
return { results, summary: 'Deploy failed.' };
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
const allOk = results.every(r => r.ok);
|
|
209
|
+
return {
|
|
210
|
+
results,
|
|
211
|
+
summary: allOk
|
|
212
|
+
? `Success. Model registered as Ollama: ${outputName}. Test with: kbot local && kbot --model ${outputName}`
|
|
213
|
+
: `Partial success. ${results.filter(r => r.ok).length}/${results.length} steps passed.`,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
/** CLI-facing: pretty-print a run. */
|
|
217
|
+
export function formatTrainSelfReport(r) {
|
|
218
|
+
const lines = ['train-self', '─'.repeat(50)];
|
|
219
|
+
for (const step of r.results) {
|
|
220
|
+
const icon = step.ok ? 'ok' : 'FAIL';
|
|
221
|
+
const ms = `${(step.duration_ms / 1000).toFixed(1)}s`;
|
|
222
|
+
lines.push(` [${icon.padStart(4)}] ${step.step.padEnd(12)} ${ms.padStart(8)}`);
|
|
223
|
+
if (step.details) {
|
|
224
|
+
for (const d of step.details.split('\n').slice(0, 6)) {
|
|
225
|
+
lines.push(` ${d}`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
lines.push('', r.summary);
|
|
230
|
+
return lines.join('\n');
|
|
231
|
+
}
|
|
232
|
+
//# sourceMappingURL=train-self.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kernel.chat/kbot",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.98.0",
|
|
4
4
|
"description": "Open-source terminal AI agent. 787+ tools, 35 agents, 20 providers. Dreams, learns, watches your system. Controls your phone. Fully local, fully sovereign. MIT.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"repository": {
|