homunculus-code 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +56 -0
- package/LICENSE +21 -0
- package/README.md +443 -0
- package/bin/init.js +317 -0
- package/commands/eval-skill.md +48 -0
- package/commands/evolve.md +67 -0
- package/commands/improve-skill.md +50 -0
- package/core/evaluate-session.js +173 -0
- package/core/observe.sh +51 -0
- package/core/prune-instincts.js +159 -0
- package/docs/nightly-agent.md +130 -0
- package/examples/reference/README.md +47 -0
- package/examples/reference/architecture.yaml +886 -0
- package/examples/reference/evolved-agents/assistant-explorer.md +86 -0
- package/examples/reference/evolved-agents/shell-debugger.md +108 -0
- package/examples/reference/evolved-agents/tdd-runner.md +112 -0
- package/examples/reference/evolved-evals/api-system-diagnosis.eval.yaml +125 -0
- package/examples/reference/evolved-evals/assistant-system-management.eval.yaml +123 -0
- package/examples/reference/evolved-evals/claude-code-reference.eval.yaml +394 -0
- package/examples/reference/evolved-evals/development-verification-patterns.eval.yaml +117 -0
- package/examples/reference/evolved-evals/multi-agent-design-patterns.eval.yaml +151 -0
- package/examples/reference/evolved-evals/shell-automation-patterns.eval.yaml +209 -0
- package/examples/reference/evolved-evals/tdd-workflow.eval.yaml +191 -0
- package/examples/reference/evolved-evals/workflows.eval.yaml +148 -0
- package/examples/reference/evolved-skills/api-system-diagnosis.md +234 -0
- package/examples/reference/evolved-skills/assistant-system-management.md +199 -0
- package/examples/reference/evolved-skills/development-verification-patterns.md +243 -0
- package/examples/reference/evolved-skills/multi-agent-design-patterns.md +259 -0
- package/examples/reference/evolved-skills/shell-automation-patterns.md +347 -0
- package/examples/reference/evolved-skills/tdd-workflow.md +272 -0
- package/examples/reference/evolved-skills/workflows.md +237 -0
- package/package.json +25 -0
- package/templates/CLAUDE.md.template +36 -0
- package/templates/architecture.template.yaml +41 -0
- package/templates/rules/evolution-system.md +29 -0
package/bin/init.js
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// homunculus init — Set up a self-evolving AI assistant in your project
|
|
3
|
+
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
const readline = require('readline');
|
|
7
|
+
|
|
8
|
+
const TEMPLATES_DIR = path.join(__dirname, '..', 'templates');
|
|
9
|
+
const CORE_DIR = path.join(__dirname, '..', 'core');
|
|
10
|
+
const COMMANDS_DIR = path.join(__dirname, '..', 'commands');
|
|
11
|
+
|
|
12
|
+
const YES_MODE = process.argv.includes('--yes') || process.argv.includes('-y');
|
|
13
|
+
|
|
14
|
+
let rl;
|
|
15
|
+
if (!YES_MODE) {
|
|
16
|
+
rl = readline.createInterface({
|
|
17
|
+
input: process.stdin,
|
|
18
|
+
output: process.stdout
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function ask(question, defaultVal) {
|
|
23
|
+
if (YES_MODE) return Promise.resolve(process.env[`HOMUNCULUS_${question.replace(/[^A-Z]/gi, '_').toUpperCase()}`] || defaultVal || '');
|
|
24
|
+
return new Promise(resolve => {
|
|
25
|
+
const suffix = defaultVal ? ` (${defaultVal})` : '';
|
|
26
|
+
rl.question(`${question}${suffix}: `, answer => {
|
|
27
|
+
resolve(answer.trim() || defaultVal || '');
|
|
28
|
+
});
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function ensureDir(dir) {
|
|
33
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function copyFile(src, dest) {
|
|
37
|
+
if (fs.existsSync(src)) {
|
|
38
|
+
fs.copyFileSync(src, dest);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function copyDir(src, dest) {
|
|
43
|
+
ensureDir(dest);
|
|
44
|
+
if (!fs.existsSync(src)) return;
|
|
45
|
+
for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
|
|
46
|
+
const srcPath = path.join(src, entry.name);
|
|
47
|
+
const destPath = path.join(dest, entry.name);
|
|
48
|
+
if (entry.isDirectory()) {
|
|
49
|
+
copyDir(srcPath, destPath);
|
|
50
|
+
} else {
|
|
51
|
+
fs.copyFileSync(srcPath, destPath);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function replaceTemplateVars(content, vars) {
|
|
57
|
+
let result = content;
|
|
58
|
+
for (const [key, value] of Object.entries(vars)) {
|
|
59
|
+
result = result.replace(new RegExp(`\\{\\{${key}\\}\\}`, 'g'), value);
|
|
60
|
+
}
|
|
61
|
+
return result;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async function main() {
|
|
65
|
+
console.log('');
|
|
66
|
+
console.log(' \x1b[1mHomunculus\x1b[0m — Self-evolving AI Assistant');
|
|
67
|
+
console.log(' A seed that grows into your own AI assistant.');
|
|
68
|
+
console.log('');
|
|
69
|
+
|
|
70
|
+
const projectDir = process.cwd();
|
|
71
|
+
|
|
72
|
+
// Check if already initialized
|
|
73
|
+
if (fs.existsSync(path.join(projectDir, 'homunculus'))) {
|
|
74
|
+
console.log(' \x1b[33m!\x1b[0m homunculus/ already exists. Re-running will skip existing files.');
|
|
75
|
+
console.log('');
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Gather info
|
|
79
|
+
const projectName = await ask(' Project name', path.basename(projectDir));
|
|
80
|
+
const purpose = await ask(' What is this project\'s main goal?', 'My evolving AI assistant');
|
|
81
|
+
|
|
82
|
+
console.log('');
|
|
83
|
+
console.log(' Select areas you want your AI to improve in:');
|
|
84
|
+
console.log('');
|
|
85
|
+
|
|
86
|
+
const goalOptions = [
|
|
87
|
+
{ key: '1', name: 'code_quality', label: 'Code Quality', desc: 'Ship fewer bugs, better tests' },
|
|
88
|
+
{ key: '2', name: 'productivity', label: 'Productivity', desc: 'Complete tasks faster' },
|
|
89
|
+
{ key: '3', name: 'debugging', label: 'Debugging', desc: 'Faster root cause analysis' },
|
|
90
|
+
{ key: '4', name: 'documentation', label: 'Documentation', desc: 'Keep docs up to date' },
|
|
91
|
+
{ key: '5', name: 'automation', label: 'Automation', desc: 'Automate repetitive work' },
|
|
92
|
+
{ key: '6', name: 'learning', label: 'Continuous Learning', desc: 'Stay up to date with tools and patterns' },
|
|
93
|
+
];
|
|
94
|
+
|
|
95
|
+
for (const opt of goalOptions) {
|
|
96
|
+
console.log(` ${opt.key}. ${opt.label} — ${opt.desc}`);
|
|
97
|
+
}
|
|
98
|
+
console.log('');
|
|
99
|
+
|
|
100
|
+
const selectedStr = await ask(' Select areas (enter numbers, e.g. 1,2,5)', '1,2');
|
|
101
|
+
const selectedKeys = selectedStr.split(/[,\s]+/).map(s => s.trim()).filter(Boolean);
|
|
102
|
+
const selectedGoals = goalOptions.filter(o => selectedKeys.includes(o.key));
|
|
103
|
+
if (selectedGoals.length === 0) selectedGoals.push(goalOptions[0], goalOptions[1]);
|
|
104
|
+
|
|
105
|
+
console.log('');
|
|
106
|
+
|
|
107
|
+
const vars = {
|
|
108
|
+
PROJECT_NAME: projectName,
|
|
109
|
+
PROJECT_PURPOSE: purpose
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
// Generate architecture.yaml from selected goals
|
|
113
|
+
function generateArchitecture(goals, rootPurpose) {
|
|
114
|
+
let yaml = `# architecture.yaml — Your goal tree\n`;
|
|
115
|
+
yaml += `# Goals are stable. Implementations evolve.\n`;
|
|
116
|
+
yaml += `# See: https://github.com/JavanC/Homunculus\n\n`;
|
|
117
|
+
yaml += `version: "1.0"\n\n`;
|
|
118
|
+
yaml += `root:\n`;
|
|
119
|
+
yaml += ` purpose: "${rootPurpose}"\n\n`;
|
|
120
|
+
yaml += ` goals:\n`;
|
|
121
|
+
|
|
122
|
+
const goalTemplates = {
|
|
123
|
+
code_quality: {
|
|
124
|
+
purpose: 'Ship fewer bugs, write more maintainable code',
|
|
125
|
+
goals: {
|
|
126
|
+
testing: { purpose: 'Every change has tests', realized_by: '# will evolve' },
|
|
127
|
+
review: { purpose: 'Catch issues before merge', realized_by: '# will evolve' }
|
|
128
|
+
},
|
|
129
|
+
metrics: [{ name: 'test_pass_rate', healthy: '> 90%' }]
|
|
130
|
+
},
|
|
131
|
+
productivity: {
|
|
132
|
+
purpose: 'Complete tasks faster with fewer iterations',
|
|
133
|
+
goals: {
|
|
134
|
+
task_completion: { purpose: 'Finish tasks in fewer cycles', realized_by: '# will evolve' },
|
|
135
|
+
tool_mastery: { purpose: 'Use the right tool on first try', realized_by: '# will evolve' }
|
|
136
|
+
},
|
|
137
|
+
metrics: [{ name: 'avg_iterations_per_task', healthy: 'decreasing trend' }]
|
|
138
|
+
},
|
|
139
|
+
debugging: {
|
|
140
|
+
purpose: 'Find and fix bugs faster',
|
|
141
|
+
goals: {
|
|
142
|
+
root_cause: { purpose: 'Identify root causes, not symptoms', realized_by: '# will evolve' },
|
|
143
|
+
diagnosis_tools: { purpose: 'Use the right debugging approach', realized_by: '# will evolve' }
|
|
144
|
+
},
|
|
145
|
+
metrics: [{ name: 'avg_debug_time', healthy: 'decreasing trend' }]
|
|
146
|
+
},
|
|
147
|
+
documentation: {
|
|
148
|
+
purpose: 'Keep documentation accurate and up to date',
|
|
149
|
+
goals: {
|
|
150
|
+
api_docs: { purpose: 'API docs match implementation', realized_by: '# will evolve' },
|
|
151
|
+
decision_records: { purpose: 'Document why, not just what', realized_by: '# will evolve' }
|
|
152
|
+
},
|
|
153
|
+
metrics: [{ name: 'doc_freshness', healthy: '< 1 week behind code' }]
|
|
154
|
+
},
|
|
155
|
+
automation: {
|
|
156
|
+
purpose: 'Automate repetitive work',
|
|
157
|
+
goals: {
|
|
158
|
+
ci_cd: { purpose: 'Automated build, test, deploy', realized_by: '# will evolve' },
|
|
159
|
+
workflows: { purpose: 'Common sequences as one command', realized_by: '# will evolve' }
|
|
160
|
+
},
|
|
161
|
+
metrics: [{ name: 'manual_steps_per_deploy', healthy: '< 3' }]
|
|
162
|
+
},
|
|
163
|
+
learning: {
|
|
164
|
+
purpose: 'Stay up to date with tools and best practices',
|
|
165
|
+
goals: {
|
|
166
|
+
tool_updates: { purpose: 'Track and adopt useful updates', realized_by: '# will evolve' },
|
|
167
|
+
pattern_discovery: { purpose: 'Find better ways to do things', realized_by: '# will evolve' }
|
|
168
|
+
},
|
|
169
|
+
metrics: [{ name: 'patterns_adopted_per_month', healthy: '> 2' }]
|
|
170
|
+
}
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
for (const goal of goals) {
|
|
174
|
+
const tmpl = goalTemplates[goal.name];
|
|
175
|
+
if (!tmpl) continue;
|
|
176
|
+
yaml += ` ${goal.name}:\n`;
|
|
177
|
+
yaml += ` purpose: "${tmpl.purpose}"\n`;
|
|
178
|
+
if (tmpl.metrics) {
|
|
179
|
+
yaml += ` metrics:\n`;
|
|
180
|
+
for (const m of tmpl.metrics) {
|
|
181
|
+
yaml += ` - name: ${m.name}\n`;
|
|
182
|
+
yaml += ` healthy: "${m.healthy}"\n`;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
if (tmpl.goals) {
|
|
186
|
+
yaml += ` goals:\n`;
|
|
187
|
+
for (const [subName, sub] of Object.entries(tmpl.goals)) {
|
|
188
|
+
yaml += ` ${subName}:\n`;
|
|
189
|
+
yaml += ` purpose: "${sub.purpose}"\n`;
|
|
190
|
+
yaml += ` realized_by: ${sub.realized_by}\n`;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
yaml += `\n`;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
yaml += ` # Add more goals as your system evolves...\n`;
|
|
197
|
+
return yaml;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// 1. Create directory structure
|
|
201
|
+
const dirs = [
|
|
202
|
+
'homunculus/instincts/personal',
|
|
203
|
+
'homunculus/instincts/archived',
|
|
204
|
+
'homunculus/evolved/skills',
|
|
205
|
+
'homunculus/evolved/agents',
|
|
206
|
+
'homunculus/evolved/evals',
|
|
207
|
+
'homunculus/experiments',
|
|
208
|
+
'scripts',
|
|
209
|
+
'.claude/rules',
|
|
210
|
+
'.claude/commands'
|
|
211
|
+
];
|
|
212
|
+
|
|
213
|
+
for (const dir of dirs) {
|
|
214
|
+
ensureDir(path.join(projectDir, dir));
|
|
215
|
+
}
|
|
216
|
+
console.log(' \x1b[32m✓\x1b[0m Created homunculus/ directory structure');
|
|
217
|
+
|
|
218
|
+
// 2. Generate architecture.yaml from selected goals
|
|
219
|
+
const archDest = path.join(projectDir, 'architecture.yaml');
|
|
220
|
+
if (!fs.existsSync(archDest)) {
|
|
221
|
+
const archContent = generateArchitecture(selectedGoals, purpose);
|
|
222
|
+
fs.writeFileSync(archDest, archContent);
|
|
223
|
+
console.log(` \x1b[32m✓\x1b[0m Created architecture.yaml with ${selectedGoals.length} goals: ${selectedGoals.map(g => g.label).join(', ')}`);
|
|
224
|
+
} else {
|
|
225
|
+
console.log(' \x1b[33m-\x1b[0m architecture.yaml already exists, skipping');
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// 3. Copy CLAUDE.md template (append if exists)
|
|
229
|
+
const claudeDest = path.join(projectDir, 'CLAUDE.md');
|
|
230
|
+
if (!fs.existsSync(claudeDest)) {
|
|
231
|
+
const template = fs.readFileSync(
|
|
232
|
+
path.join(TEMPLATES_DIR, 'CLAUDE.md.template'), 'utf8'
|
|
233
|
+
);
|
|
234
|
+
fs.writeFileSync(claudeDest, replaceTemplateVars(template, vars));
|
|
235
|
+
console.log(' \x1b[32m✓\x1b[0m Created CLAUDE.md');
|
|
236
|
+
} else {
|
|
237
|
+
console.log(' \x1b[33m-\x1b[0m CLAUDE.md already exists, skipping');
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// 4. Copy evolution rules
|
|
241
|
+
const rulesDest = path.join(projectDir, '.claude', 'rules', 'evolution-system.md');
|
|
242
|
+
if (!fs.existsSync(rulesDest)) {
|
|
243
|
+
copyFile(
|
|
244
|
+
path.join(TEMPLATES_DIR, 'rules', 'evolution-system.md'),
|
|
245
|
+
rulesDest
|
|
246
|
+
);
|
|
247
|
+
console.log(' \x1b[32m✓\x1b[0m Created .claude/rules/evolution-system.md');
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// 5. Copy core scripts
|
|
251
|
+
if (fs.existsSync(CORE_DIR)) {
|
|
252
|
+
copyDir(CORE_DIR, path.join(projectDir, 'scripts'));
|
|
253
|
+
console.log(' \x1b[32m✓\x1b[0m Copied evolution scripts to scripts/');
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// 6. Copy slash commands
|
|
257
|
+
if (fs.existsSync(COMMANDS_DIR)) {
|
|
258
|
+
copyDir(COMMANDS_DIR, path.join(projectDir, '.claude', 'commands'));
|
|
259
|
+
console.log(' \x1b[32m✓\x1b[0m Copied slash commands to .claude/commands/');
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// 7. Configure Claude Code hooks (if settings exist)
|
|
263
|
+
const settingsPath = path.join(projectDir, '.claude', 'settings.json');
|
|
264
|
+
let settings = {};
|
|
265
|
+
if (fs.existsSync(settingsPath)) {
|
|
266
|
+
try { settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); } catch {}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
if (!settings.hooks) settings.hooks = {};
|
|
270
|
+
if (!settings.hooks.PostToolUse) {
|
|
271
|
+
settings.hooks.PostToolUse = [{
|
|
272
|
+
type: "command",
|
|
273
|
+
command: "bash scripts/observe.sh post",
|
|
274
|
+
description: "Homunculus: observe tool usage"
|
|
275
|
+
}];
|
|
276
|
+
ensureDir(path.join(projectDir, '.claude'));
|
|
277
|
+
fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2) + '\n');
|
|
278
|
+
console.log(' \x1b[32m✓\x1b[0m Configured Claude Code observation hook');
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// 8. Create .gitignore additions
|
|
282
|
+
const gitignorePath = path.join(projectDir, '.gitignore');
|
|
283
|
+
const gitignoreEntries = [
|
|
284
|
+
'',
|
|
285
|
+
'# Homunculus runtime data',
|
|
286
|
+
'homunculus/observations.jsonl*',
|
|
287
|
+
'data/hook-profile*',
|
|
288
|
+
'data/auto-learn-cooldown.json'
|
|
289
|
+
].join('\n');
|
|
290
|
+
|
|
291
|
+
if (fs.existsSync(gitignorePath)) {
|
|
292
|
+
const existing = fs.readFileSync(gitignorePath, 'utf8');
|
|
293
|
+
if (!existing.includes('Homunculus runtime')) {
|
|
294
|
+
fs.appendFileSync(gitignorePath, '\n' + gitignoreEntries + '\n');
|
|
295
|
+
console.log(' \x1b[32m✓\x1b[0m Updated .gitignore');
|
|
296
|
+
}
|
|
297
|
+
} else {
|
|
298
|
+
fs.writeFileSync(gitignorePath, gitignoreEntries.trim() + '\n');
|
|
299
|
+
console.log(' \x1b[32m✓\x1b[0m Created .gitignore');
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
console.log('');
|
|
303
|
+
console.log(' \x1b[1m\x1b[32mDone!\x1b[0m Your assistant is ready to evolve.');
|
|
304
|
+
console.log('');
|
|
305
|
+
console.log(' Next steps:');
|
|
306
|
+
console.log(' 1. Edit \x1b[1marchitecture.yaml\x1b[0m to define your goals');
|
|
307
|
+
console.log(' 2. Use Claude Code normally — the system observes automatically');
|
|
308
|
+
console.log(' 3. Run \x1b[1mclaude "/eval-skill"\x1b[0m to check evolution progress');
|
|
309
|
+
console.log('');
|
|
310
|
+
|
|
311
|
+
if (rl) rl.close();
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
main().catch(err => {
|
|
315
|
+
console.error('Error:', err.message);
|
|
316
|
+
process.exit(1);
|
|
317
|
+
});
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
---
|
|
2
|
+
disable-model-invocation: true
|
|
3
|
+
---
|
|
4
|
+
# /eval-skill — Evaluate an Evolved Skill
|
|
5
|
+
|
|
6
|
+
Run scenario-based tests on a skill to measure its quality.
|
|
7
|
+
|
|
8
|
+
## Steps
|
|
9
|
+
|
|
10
|
+
1. List eval specs: `ls homunculus/evolved/evals/*.eval.yaml 2>/dev/null`
|
|
11
|
+
2. If user specified a skill name, use that eval spec; otherwise let user choose
|
|
12
|
+
3. Read the skill file (`homunculus/evolved/skills/<name>.md`) and its eval spec
|
|
13
|
+
|
|
14
|
+
## Evaluation
|
|
15
|
+
|
|
16
|
+
For each scenario, act as a **developer who doesn't know the answer** — only reference the skill document. Then compare against expected_behavior and anti_patterns.
|
|
17
|
+
|
|
18
|
+
## Results
|
|
19
|
+
|
|
20
|
+
| Result | Condition |
|
|
21
|
+
|--------|-----------|
|
|
22
|
+
| **PASS** | Skill guides all expected behaviors, no anti-patterns triggered |
|
|
23
|
+
| **PARTIAL** | Skill guides some expected behaviors, or misses important details |
|
|
24
|
+
| **FAIL** | Skill fails to guide correct behavior, or would cause anti-patterns |
|
|
25
|
+
| **GAP** | Scenario knowledge is completely absent from skill |
|
|
26
|
+
|
|
27
|
+
## Report Format
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
🔬 Skill Eval: <name> v<version>
|
|
31
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
32
|
+
|
|
33
|
+
Scenario Result Notes
|
|
34
|
+
──────────────────────────────────────
|
|
35
|
+
<scenario.name> PASS -
|
|
36
|
+
<scenario.name> PARTIAL Missing X
|
|
37
|
+
<scenario.name> FAIL Would cause Y
|
|
38
|
+
|
|
39
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
40
|
+
Pass rate: X/Y (Z%)
|
|
41
|
+
Grade: ⭐⭐⭐⭐⭐ (>= 90)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## After Evaluation
|
|
45
|
+
|
|
46
|
+
- Update `last_eval` and `pass_rate` in the eval spec
|
|
47
|
+
- Append result to `homunculus/evolved/evals/history.jsonl`
|
|
48
|
+
- Suggest improvements for FAIL/PARTIAL/GAP scenarios
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
---
|
|
2
|
+
disable-model-invocation: true
|
|
3
|
+
---
|
|
4
|
+
# /evolve — Converge Instincts into Skills
|
|
5
|
+
|
|
6
|
+
Analyze existing instincts and find patterns that can be aggregated into higher-level skills.
|
|
7
|
+
|
|
8
|
+
## Modes
|
|
9
|
+
|
|
10
|
+
- Default → **Interactive mode** (manual confirmation)
|
|
11
|
+
- `--auto` → **Auto mode** (for nightly agent, no confirmation needed)
|
|
12
|
+
|
|
13
|
+
## Interactive Mode
|
|
14
|
+
|
|
15
|
+
### Steps
|
|
16
|
+
|
|
17
|
+
1. Read all instincts from `homunculus/instincts/personal/`
|
|
18
|
+
2. Group by trigger/topic similarity
|
|
19
|
+
3. Analyze groups:
|
|
20
|
+
- **2+ instincts with similar triggers** → Skill candidate
|
|
21
|
+
- **High confidence (≥0.7) workflow combos** → Command candidate
|
|
22
|
+
4. Present candidates for user confirmation
|
|
23
|
+
5. Generate to `homunculus/evolved/`
|
|
24
|
+
|
|
25
|
+
### Report
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
Evolution Analysis
|
|
29
|
+
━━━━━━━━━━━━━━━━━━
|
|
30
|
+
Instincts: N total
|
|
31
|
+
Groups: M
|
|
32
|
+
|
|
33
|
+
Skill candidates:
|
|
34
|
+
1. <name> (source: N instincts, avg confidence: 0.X)
|
|
35
|
+
2. ...
|
|
36
|
+
|
|
37
|
+
Select items to evolve (enter numbers) or 'all':
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Auto Mode (--auto)
|
|
41
|
+
|
|
42
|
+
For nightly agent. Runs the full pipeline: evolve → eval → improve.
|
|
43
|
+
|
|
44
|
+
1. **Evolve**: Auto-select candidates with avg confidence ≥ 0.7
|
|
45
|
+
2. **Eval**: Run scenario tests on all skills with eval specs
|
|
46
|
+
3. **Improve**: Auto-improve skills below 100% (max 3 rounds)
|
|
47
|
+
- Rollback if score regresses
|
|
48
|
+
|
|
49
|
+
## Skill Format
|
|
50
|
+
|
|
51
|
+
```yaml
|
|
52
|
+
name: <skill name>
|
|
53
|
+
description: <description>
|
|
54
|
+
trigger: <unified trigger>
|
|
55
|
+
steps:
|
|
56
|
+
- <step 1>
|
|
57
|
+
- <step 2>
|
|
58
|
+
source_instincts:
|
|
59
|
+
- <source instinct filename>
|
|
60
|
+
confidence: <average confidence>
|
|
61
|
+
created: <ISO timestamp>
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Notes
|
|
65
|
+
- Source instincts are NOT deleted (preserved for history)
|
|
66
|
+
- Each evolution tracked via git
|
|
67
|
+
- Evolved skill confidence inherited from source instincts
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
---
|
|
2
|
+
disable-model-invocation: true
|
|
3
|
+
---
|
|
4
|
+
# /improve-skill — Auto-Improve an Evolved Skill
|
|
5
|
+
|
|
6
|
+
Iteratively improve a skill until its eval passes, using an eval → improve loop.
|
|
7
|
+
|
|
8
|
+
## Flow
|
|
9
|
+
|
|
10
|
+
```
|
|
11
|
+
┌─── Round 1 ──────────────────────────┐
|
|
12
|
+
│ 1. /eval-skill → baseline score │
|
|
13
|
+
│ 2. Analyze FAIL/PARTIAL/GAP │
|
|
14
|
+
│ 3. Modify skill file │
|
|
15
|
+
│ 4. Bump version +0.1 │
|
|
16
|
+
│ 5. Re-eval │
|
|
17
|
+
│ 6. Compare scores: │
|
|
18
|
+
│ ├─ Improved (≥5pp) → next round │
|
|
19
|
+
│ ├─ Noise (<5pp) → stop │
|
|
20
|
+
│ └─ Regressed (≤-5pp) → rollback │
|
|
21
|
+
└──────────────────────────────────────┘
|
|
22
|
+
↓ (max 5 rounds)
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Steps
|
|
26
|
+
|
|
27
|
+
1. Verify target skill and eval spec exist
|
|
28
|
+
2. Run initial eval, record baseline score
|
|
29
|
+
3. **Improve loop** (max 5 rounds):
|
|
30
|
+
a. Analyze failing scenarios
|
|
31
|
+
b. Modify skill file:
|
|
32
|
+
- FAIL → fix incorrect info or add missing rules
|
|
33
|
+
- PARTIAL → add detail
|
|
34
|
+
- GAP → add new section
|
|
35
|
+
c. Increment version (1.1 → 1.2 → 1.3...)
|
|
36
|
+
d. Re-eval
|
|
37
|
+
e. Compare scores (apply noise tolerance: 5pp)
|
|
38
|
+
4. Output improvement report
|
|
39
|
+
|
|
40
|
+
## Regression Detection
|
|
41
|
+
|
|
42
|
+
If a previously passing scenario now fails:
|
|
43
|
+
- Mark as **REGRESSION**
|
|
44
|
+
- Must fix regression before continuing
|
|
45
|
+
- If unable to fix, rollback to previous version
|
|
46
|
+
|
|
47
|
+
## Notes
|
|
48
|
+
- Only modify the skill file, never the eval spec (tests stay fixed)
|
|
49
|
+
- All intermediate versions tracked via git
|
|
50
|
+
- Score delta < 5pp = statistical noise, not real improvement
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// evaluate-session.js — Extract instincts from session observations
|
|
3
|
+
// Part of the Homunculus evolution pipeline
|
|
4
|
+
//
|
|
5
|
+
// Flow:
|
|
6
|
+
// 1. Read observations.jsonl, analyze tool usage patterns
|
|
7
|
+
// 2. Determine if session is worth extracting (enough activity)
|
|
8
|
+
// 3. Use Claude CLI to extract behavioral patterns as instincts
|
|
9
|
+
// 4. Write instincts to homunculus/instincts/personal/
|
|
10
|
+
|
|
11
|
+
const fs = require('fs');
|
|
12
|
+
const path = require('path');
|
|
13
|
+
const { execSync } = require('child_process');
|
|
14
|
+
|
|
15
|
+
// Configuration — adapt these to your setup
|
|
16
|
+
const BASE_DIR = process.env.HOMUNCULUS_BASE || process.cwd();
|
|
17
|
+
const HOMUNCULUS_DIR = path.join(BASE_DIR, 'homunculus');
|
|
18
|
+
const INSTINCTS_DIR = path.join(HOMUNCULUS_DIR, 'instincts', 'personal');
|
|
19
|
+
const OBS_FILE = path.join(HOMUNCULUS_DIR, 'observations.jsonl');
|
|
20
|
+
|
|
21
|
+
// Extraction thresholds (auto-tunable)
|
|
22
|
+
const CONFIG = {
|
|
23
|
+
min_messages: parseInt(process.env.HOMUNCULUS_MIN_MESSAGES || '10'),
|
|
24
|
+
min_tool_repeats: parseInt(process.env.HOMUNCULUS_MIN_TOOL_REPEATS || '5'),
|
|
25
|
+
daily_limit: parseInt(process.env.HOMUNCULUS_DAILY_LIMIT || '3')
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
function ensureDir(dir) {
|
|
29
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function getDateString() {
|
|
33
|
+
return new Date().toISOString().slice(0, 10);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function log(...args) {
|
|
37
|
+
process.stderr.write('[evaluate-session] ' + args.join(' ') + '\n');
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Count today's extractions to respect daily limit
|
|
41
|
+
function getTodayExtractions() {
|
|
42
|
+
const today = getDateString();
|
|
43
|
+
try {
|
|
44
|
+
const files = fs.readdirSync(INSTINCTS_DIR);
|
|
45
|
+
return files.filter(f => {
|
|
46
|
+
const content = fs.readFileSync(path.join(INSTINCTS_DIR, f), 'utf8');
|
|
47
|
+
return content.includes(today);
|
|
48
|
+
}).length;
|
|
49
|
+
} catch {
|
|
50
|
+
return 0;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Analyze observations to find patterns
|
|
55
|
+
function analyzeObservations() {
|
|
56
|
+
if (!fs.existsSync(OBS_FILE)) {
|
|
57
|
+
log('No observations file found');
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const lines = fs.readFileSync(OBS_FILE, 'utf8').trim().split('\n').filter(Boolean);
|
|
62
|
+
if (lines.length < CONFIG.min_messages) {
|
|
63
|
+
log(`Only ${lines.length} observations (need ${CONFIG.min_messages}), skipping`);
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Count tool usage
|
|
68
|
+
const toolCounts = {};
|
|
69
|
+
for (const line of lines) {
|
|
70
|
+
try {
|
|
71
|
+
const obs = JSON.parse(line);
|
|
72
|
+
const tool = obs.tool || 'unknown';
|
|
73
|
+
toolCounts[tool] = (toolCounts[tool] || 0) + 1;
|
|
74
|
+
} catch {}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Find high-frequency tools
|
|
78
|
+
const frequentTools = Object.entries(toolCounts)
|
|
79
|
+
.filter(([_, count]) => count >= CONFIG.min_tool_repeats)
|
|
80
|
+
.sort((a, b) => b[1] - a[1]);
|
|
81
|
+
|
|
82
|
+
if (frequentTools.length === 0) {
|
|
83
|
+
log('No high-frequency tool patterns found, skipping');
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return {
|
|
88
|
+
total_observations: lines.length,
|
|
89
|
+
frequent_tools: frequentTools,
|
|
90
|
+
tool_counts: toolCounts
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Extract instinct using Claude CLI
|
|
95
|
+
function extractInstinct(analysis) {
|
|
96
|
+
const toolSummary = analysis.frequent_tools
|
|
97
|
+
.map(([tool, count]) => `${tool}: ${count} times`)
|
|
98
|
+
.join(', ');
|
|
99
|
+
|
|
100
|
+
const prompt = `Based on this session's tool usage patterns, extract ONE behavioral instinct that would be useful to remember for future sessions.
|
|
101
|
+
|
|
102
|
+
Tool usage: ${toolSummary}
|
|
103
|
+
Total observations: ${analysis.total_observations}
|
|
104
|
+
|
|
105
|
+
Write the instinct as a markdown file with this format:
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
name: descriptive-kebab-case-name
|
|
109
|
+
category: one of [coding, debugging, workflow, communication, tool-usage]
|
|
110
|
+
confidence: 0.7
|
|
111
|
+
extracted: "${getDateString()}"
|
|
112
|
+
source: "session observation"
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Pattern
|
|
116
|
+
[Describe the behavioral pattern in 1-2 sentences]
|
|
117
|
+
|
|
118
|
+
## When to Apply
|
|
119
|
+
[When should this pattern be used]
|
|
120
|
+
|
|
121
|
+
## Anti-Patterns
|
|
122
|
+
[What to avoid]
|
|
123
|
+
|
|
124
|
+
IMPORTANT: Only output the markdown content, nothing else.`;
|
|
125
|
+
|
|
126
|
+
try {
|
|
127
|
+
const result = execSync(
|
|
128
|
+
`claude -p "${prompt.replace(/"/g, '\\"')}" --model claude-sonnet-4-6 --max-budget-usd 0.50`,
|
|
129
|
+
{ encoding: 'utf8', timeout: 30000 }
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
// Extract name from frontmatter
|
|
133
|
+
const nameMatch = result.match(/name:\s*(.+)/);
|
|
134
|
+
if (!nameMatch) return null;
|
|
135
|
+
|
|
136
|
+
const name = nameMatch[1].trim().replace(/[^a-z0-9-]/g, '-');
|
|
137
|
+
const filename = `${name}.md`;
|
|
138
|
+
const filepath = path.join(INSTINCTS_DIR, filename);
|
|
139
|
+
|
|
140
|
+
if (fs.existsSync(filepath)) {
|
|
141
|
+
log(`Instinct ${filename} already exists, skipping`);
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
ensureDir(INSTINCTS_DIR);
|
|
146
|
+
fs.writeFileSync(filepath, result.trim() + '\n');
|
|
147
|
+
log(`Extracted instinct: ${filename}`);
|
|
148
|
+
return filename;
|
|
149
|
+
} catch (err) {
|
|
150
|
+
log(`Extraction failed: ${err.message}`);
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Main
|
|
156
|
+
function main() {
|
|
157
|
+
// Check daily limit
|
|
158
|
+
const todayCount = getTodayExtractions();
|
|
159
|
+
if (todayCount >= CONFIG.daily_limit) {
|
|
160
|
+
log(`Daily limit reached (${todayCount}/${CONFIG.daily_limit}), skipping`);
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const analysis = analyzeObservations();
|
|
165
|
+
if (!analysis) return;
|
|
166
|
+
|
|
167
|
+
const result = extractInstinct(analysis);
|
|
168
|
+
if (result) {
|
|
169
|
+
log(`Successfully extracted: ${result}`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
main();
|