homunculus-code 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/CONTRIBUTING.md +56 -0
  2. package/LICENSE +21 -0
  3. package/README.md +443 -0
  4. package/bin/init.js +317 -0
  5. package/commands/eval-skill.md +48 -0
  6. package/commands/evolve.md +67 -0
  7. package/commands/improve-skill.md +50 -0
  8. package/core/evaluate-session.js +173 -0
  9. package/core/observe.sh +51 -0
  10. package/core/prune-instincts.js +159 -0
  11. package/docs/nightly-agent.md +130 -0
  12. package/examples/reference/README.md +47 -0
  13. package/examples/reference/architecture.yaml +886 -0
  14. package/examples/reference/evolved-agents/assistant-explorer.md +86 -0
  15. package/examples/reference/evolved-agents/shell-debugger.md +108 -0
  16. package/examples/reference/evolved-agents/tdd-runner.md +112 -0
  17. package/examples/reference/evolved-evals/api-system-diagnosis.eval.yaml +125 -0
  18. package/examples/reference/evolved-evals/assistant-system-management.eval.yaml +123 -0
  19. package/examples/reference/evolved-evals/claude-code-reference.eval.yaml +394 -0
  20. package/examples/reference/evolved-evals/development-verification-patterns.eval.yaml +117 -0
  21. package/examples/reference/evolved-evals/multi-agent-design-patterns.eval.yaml +151 -0
  22. package/examples/reference/evolved-evals/shell-automation-patterns.eval.yaml +209 -0
  23. package/examples/reference/evolved-evals/tdd-workflow.eval.yaml +191 -0
  24. package/examples/reference/evolved-evals/workflows.eval.yaml +148 -0
  25. package/examples/reference/evolved-skills/api-system-diagnosis.md +234 -0
  26. package/examples/reference/evolved-skills/assistant-system-management.md +199 -0
  27. package/examples/reference/evolved-skills/development-verification-patterns.md +243 -0
  28. package/examples/reference/evolved-skills/multi-agent-design-patterns.md +259 -0
  29. package/examples/reference/evolved-skills/shell-automation-patterns.md +347 -0
  30. package/examples/reference/evolved-skills/tdd-workflow.md +272 -0
  31. package/examples/reference/evolved-skills/workflows.md +237 -0
  32. package/package.json +25 -0
  33. package/templates/CLAUDE.md.template +36 -0
  34. package/templates/architecture.template.yaml +41 -0
  35. package/templates/rules/evolution-system.md +29 -0
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env bash
2
+ # observe.sh — Observe tool usage for evolution
3
+ # Usage: observe.sh <pre|post> [tool_name]
4
+ # Called by Claude Code PostToolUse hook
5
+
6
+ set -euo pipefail
7
+
8
+ # Configuration — override these with environment variables
9
+ HOMUNCULUS_DIR="${HOMUNCULUS_DIR:-$(pwd)/homunculus}"
10
+ OBS_FILE="${HOMUNCULUS_DIR}/observations.jsonl"
11
+ MAX_SIZE=$((10 * 1024 * 1024)) # 10MB
12
+ COUNTER_FILE="/tmp/homunculus-tool-count-$$"
13
+
14
+ PHASE="${1:-unknown}"
15
+
16
+ # Read stdin (hook input JSON)
17
+ INPUT=""
18
+ if [ ! -t 0 ]; then
19
+ INPUT=$(cat)
20
+ fi
21
+
22
+ # Extract tool_name from stdin JSON
23
+ TOOL_NAME="unknown"
24
+ if command -v jq &>/dev/null && [ -n "$INPUT" ]; then
25
+ EXTRACTED=$(echo "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null)
26
+ [ -n "$EXTRACTED" ] && TOOL_NAME="$EXTRACTED"
27
+ fi
28
+
29
+ # Only observe post-tool usage
30
+ [ "$PHASE" != "post" ] && exit 0
31
+
32
+ # Rotate if too large
33
+ if [ -f "$OBS_FILE" ] && [ "$(stat -f%z "$OBS_FILE" 2>/dev/null || stat -c%s "$OBS_FILE" 2>/dev/null || echo 0)" -gt "$MAX_SIZE" ]; then
34
+ TIMESTAMP=$(date +%Y%m%d%H%M%S)
35
+ gzip -c "$OBS_FILE" > "${OBS_FILE}.${TIMESTAMP}.gz"
36
+ : > "$OBS_FILE"
37
+ fi
38
+
39
+ # Write observation
40
+ TIMESTAMP=$(date -u +%Y-%m-%dT%H:%M:%SZ)
41
+ mkdir -p "$(dirname "$OBS_FILE")"
42
+
43
+ if command -v jq &>/dev/null; then
44
+ jq -nc \
45
+ --arg ts "$TIMESTAMP" \
46
+ --arg phase "$PHASE" \
47
+ --arg tool "$TOOL_NAME" \
48
+ '{timestamp: $ts, phase: $phase, tool: $tool}' >> "$OBS_FILE"
49
+ else
50
+ echo "{\"timestamp\":\"$TIMESTAMP\",\"phase\":\"$PHASE\",\"tool\":\"$TOOL_NAME\"}" >> "$OBS_FILE"
51
+ fi
@@ -0,0 +1,159 @@
1
+ #!/usr/bin/env node
2
+ // prune-instincts.js — Auto-archive low-value instincts
3
+ //
4
+ // Scoring dimensions:
5
+ // 1. Age (older = more likely outdated)
6
+ // 2. Confidence with time decay (half-life based)
7
+ // 3. Skill coverage (already covered by a skill = lower value)
8
+ //
9
+ // Usage:
10
+ // node prune-instincts.js # Dry run — list archive candidates
11
+ // node prune-instincts.js --apply # Execute archival
12
+ // node prune-instincts.js --threshold 40 # Custom score threshold (default: 75)
13
+
14
+ const fs = require('fs');
15
+ const path = require('path');
16
+
17
+ // Configuration — override with environment variables
18
+ const BASE_DIR = process.env.HOMUNCULUS_BASE || process.cwd();
19
+ const PERSONAL_DIR = path.join(BASE_DIR, 'homunculus', 'instincts', 'personal');
20
+ const ARCHIVED_DIR = path.join(BASE_DIR, 'homunculus', 'instincts', 'archived');
21
+ const SKILLS_DIR = path.join(BASE_DIR, 'homunculus', 'evolved', 'skills');
22
+
23
+ const args = process.argv.slice(2);
24
+ const applyMode = args.includes('--apply');
25
+ const thresholdIdx = args.indexOf('--threshold');
26
+ const ARCHIVE_THRESHOLD = thresholdIdx >= 0 ? parseInt(args[thresholdIdx + 1], 10) : 75;
27
+ const CAPACITY_SOFT_LIMIT = 80;
28
+
29
+ // Confidence decay: half-life in days
30
+ const CONFIDENCE_HALF_LIFE_DAYS = 90;
31
+ const DECAY_LAMBDA = Math.LN2 / CONFIDENCE_HALF_LIFE_DAYS;
32
+
33
+ function safeRead(fp) {
34
+ try { return fs.readFileSync(fp, 'utf8'); } catch { return ''; }
35
+ }
36
+
37
+ function parseInstinct(filepath) {
38
+ const content = safeRead(filepath);
39
+ const name = path.basename(filepath, '.md');
40
+
41
+ const confidence = parseFloat((content.match(/confidence:\s*([\d.]+)/im) || [])[1] || '0.5');
42
+ const createdMatch = content.match(/(?:extracted|created|date):\s*"?([^"\n]+)"?/im);
43
+ const created = createdMatch ? new Date(createdMatch[1]) : null;
44
+ const updatedMatch = content.match(/(?:updated|last_reinforced):\s*"?([^"\n]+)"?/im);
45
+ const updated = updatedMatch ? new Date(updatedMatch[1]) : created;
46
+
47
+ // Confidence decay
48
+ const daysSinceUpdate = updated ? (Date.now() - updated.getTime()) / 86400000 : 180;
49
+ const effectiveConfidence = confidence * Math.exp(-DECAY_LAMBDA * daysSinceUpdate);
50
+
51
+ return { name, confidence, effectiveConfidence, created, updated, daysSinceUpdate, content };
52
+ }
53
+
54
+ function getSkillCoverage() {
55
+ const coverage = new Set();
56
+ if (!fs.existsSync(SKILLS_DIR)) return coverage;
57
+
58
+ for (const file of fs.readdirSync(SKILLS_DIR).filter(f => f.endsWith('.md'))) {
59
+ const content = safeRead(path.join(SKILLS_DIR, file));
60
+ // Skills often reference instinct names they were derived from
61
+ const sourceMatch = content.match(/source_instincts?:(.+)/gim);
62
+ if (sourceMatch) {
63
+ for (const match of sourceMatch) {
64
+ const names = match.replace(/source_instincts?:/i, '').split(/[,\s]+/);
65
+ names.forEach(n => coverage.add(n.trim()));
66
+ }
67
+ }
68
+ }
69
+ return coverage;
70
+ }
71
+
72
+ function scoreInstinct(instinct, skillCoverage, totalCount) {
73
+ let score = 100;
74
+ const reasons = [];
75
+
76
+ // Effective confidence (with decay)
77
+ if (instinct.effectiveConfidence < 0.5) {
78
+ score -= 30;
79
+ reasons.push(`low effective confidence: ${instinct.effectiveConfidence.toFixed(2)}`);
80
+ } else if (instinct.effectiveConfidence < 0.7) {
81
+ score -= 15;
82
+ reasons.push(`medium confidence: ${instinct.effectiveConfidence.toFixed(2)}`);
83
+ }
84
+
85
+ // Age penalty
86
+ if (instinct.daysSinceUpdate > 60) {
87
+ score -= 20;
88
+ reasons.push(`old: ${Math.floor(instinct.daysSinceUpdate)} days`);
89
+ } else if (instinct.daysSinceUpdate > 30) {
90
+ score -= 10;
91
+ reasons.push(`aging: ${Math.floor(instinct.daysSinceUpdate)} days`);
92
+ }
93
+
94
+ // Skill coverage
95
+ if (skillCoverage.has(instinct.name)) {
96
+ score -= 25;
97
+ reasons.push('covered by skill');
98
+ }
99
+
100
+ // Capacity pressure
101
+ if (totalCount > CAPACITY_SOFT_LIMIT) {
102
+ score -= Math.min(10, totalCount - CAPACITY_SOFT_LIMIT);
103
+ reasons.push(`capacity pressure: ${totalCount}/${CAPACITY_SOFT_LIMIT}`);
104
+ }
105
+
106
+ return { score: Math.max(0, score), reasons };
107
+ }
108
+
109
+ function main() {
110
+ if (!fs.existsSync(PERSONAL_DIR)) {
111
+ console.log('No instincts directory found. Nothing to prune.');
112
+ return;
113
+ }
114
+
115
+ const files = fs.readdirSync(PERSONAL_DIR).filter(f => f.endsWith('.md'));
116
+ const skillCoverage = getSkillCoverage();
117
+ const candidates = [];
118
+
119
+ for (const file of files) {
120
+ const instinct = parseInstinct(path.join(PERSONAL_DIR, file));
121
+ const { score, reasons } = scoreInstinct(instinct, skillCoverage, files.length);
122
+
123
+ if (score < ARCHIVE_THRESHOLD) {
124
+ candidates.push({ ...instinct, score, reasons });
125
+ }
126
+ }
127
+
128
+ candidates.sort((a, b) => a.score - b.score);
129
+
130
+ console.log(`\nInstincts: ${files.length} active | Threshold: ${ARCHIVE_THRESHOLD} | Candidates: ${candidates.length}`);
131
+ console.log('─'.repeat(60));
132
+
133
+ if (candidates.length === 0) {
134
+ console.log('No archive candidates found.');
135
+ return;
136
+ }
137
+
138
+ for (const c of candidates) {
139
+ console.log(` ${c.score.toString().padStart(3)} | ${c.name}`);
140
+ console.log(` ${c.reasons.join(', ')}`);
141
+ }
142
+
143
+ if (applyMode) {
144
+ console.log('\nArchiving...');
145
+ if (!fs.existsSync(ARCHIVED_DIR)) fs.mkdirSync(ARCHIVED_DIR, { recursive: true });
146
+
147
+ for (const c of candidates) {
148
+ const src = path.join(PERSONAL_DIR, `${c.name}.md`);
149
+ const dest = path.join(ARCHIVED_DIR, `${c.name}.md`);
150
+ fs.renameSync(src, dest);
151
+ console.log(` Archived: ${c.name}`);
152
+ }
153
+ console.log(`\nDone. Archived ${candidates.length} instincts.`);
154
+ } else {
155
+ console.log(`\nDry run. Use --apply to archive these ${candidates.length} instincts.`);
156
+ }
157
+ }
158
+
159
+ main();
@@ -0,0 +1,130 @@
1
+ # Nightly Agent Setup
2
+
3
+ The nightly agent is what makes Homunculus truly autonomous. It runs a heartbeat loop while you sleep — checking goal health, evolving skills, researching improvements, and running experiments.
4
+
5
+ ## Prerequisites
6
+
7
+ - Homunculus initialized in your project (`npx homunculus init`)
8
+ - Claude Code CLI installed (`~/.local/bin/claude`)
9
+ - macOS (launchd) or Linux (cron)
10
+
11
+ ## How It Works
12
+
13
+ The nightly agent is a shell script that runs on a schedule. Each "tick" of the heartbeat:
14
+
15
+ 1. **Health Check** — Scans `architecture.yaml`, runs each goal's `health_check.command`
16
+ 2. **Evolve** — Runs `/evolve --auto` to converge instincts → skills → eval → improve
17
+ 3. **Research** — Uses Claude to scan for better implementations of unhealthy goals
18
+ 4. **Experiment** — Generates hypotheses, runs experiments in git worktrees
19
+ 5. **Report** — Produces a morning report summarizing all changes
20
+
21
+ ## Setup (macOS — launchd)
22
+
23
+ ### 1. Create the heartbeat script
24
+
25
+ ```bash
26
+ #!/usr/bin/env bash
27
+ # heartbeat.sh — Nightly evolution agent
28
+ set -euo pipefail
29
+
30
+ cd /path/to/your/project
31
+ LOG="heartbeat-$(date +%Y%m%d).log"
32
+
33
+ echo "[$(date)] Starting nightly evolution..." >> "$LOG"
34
+
35
+ # Unset CLAUDECODE to avoid nested session conflicts
36
+ unset CLAUDECODE
37
+
38
+ # Run evolution pipeline
39
+ claude -p "Run /evolve --auto. Then check goal health in architecture.yaml. \
40
+ Research any unhealthy goals. Generate a morning report." \
41
+ --model claude-sonnet-4-6 \
42
+ --max-budget-usd 2.00 \
43
+ --no-session-persistence \
44
+ >> "$LOG" 2>&1
45
+
46
+ echo "[$(date)] Nightly evolution complete." >> "$LOG"
47
+ ```
48
+
49
+ ### 2. Create the launchd plist
50
+
51
+ Save to `~/Library/LaunchAgents/com.homunculus.heartbeat.plist`:
52
+
53
+ ```xml
54
+ <?xml version="1.0" encoding="UTF-8"?>
55
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
56
+ "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
57
+ <plist version="1.0">
58
+ <dict>
59
+ <key>Label</key>
60
+ <string>com.homunculus.heartbeat</string>
61
+ <key>ProgramArguments</key>
62
+ <array>
63
+ <string>/path/to/your/project/heartbeat.sh</string>
64
+ </array>
65
+ <key>StartCalendarInterval</key>
66
+ <dict>
67
+ <key>Hour</key>
68
+ <integer>2</integer>
69
+ <key>Minute</key>
70
+ <integer>0</integer>
71
+ </dict>
72
+ <key>EnvironmentVariables</key>
73
+ <dict>
74
+ <key>PATH</key>
75
+ <string>/usr/local/bin:/usr/bin:/bin:~/.local/bin:/opt/homebrew/bin</string>
76
+ </dict>
77
+ <key>StandardOutPath</key>
78
+ <string>/tmp/homunculus-heartbeat.log</string>
79
+ <key>StandardErrorPath</key>
80
+ <string>/tmp/homunculus-heartbeat.log</string>
81
+ </dict>
82
+ </plist>
83
+ ```
84
+
85
+ ### 3. Load the agent
86
+
87
+ ```bash
88
+ launchctl load ~/Library/LaunchAgents/com.homunculus.heartbeat.plist
89
+ ```
90
+
91
+ ## Setup (Linux — cron)
92
+
93
+ ```bash
94
+ # Run at 2 AM every night
95
+ 0 2 * * * cd /path/to/your/project && bash heartbeat.sh
96
+ ```
97
+
98
+ > Note: cron does not have access to macOS Keychain. If your Claude CLI uses OAuth, use launchd instead.
99
+
100
+ ## Budget Control
101
+
102
+ The `--max-budget-usd` flag controls how much the nightly agent can spend per run. Start with `$2.00` and adjust based on your needs.
103
+
104
+ ## Morning Report
105
+
106
+ After a successful run, the agent produces a report. You can configure it to:
107
+ - Write to a file (`heartbeat/data/morning-report.md`)
108
+ - Send to Discord via webhook
109
+ - Push a desktop notification via `osascript`
110
+
111
+ ## Monitoring
112
+
113
+ Check if the agent ran:
114
+ ```bash
115
+ # Last run time
116
+ ls -la /tmp/homunculus-heartbeat.log
117
+
118
+ # Recent output
119
+ tail -50 /tmp/homunculus-heartbeat.log
120
+ ```
121
+
122
+ ## Advanced: Multi-Tick Heartbeat
123
+
124
+ The reference implementation uses a more sophisticated heartbeat with:
125
+ - **Priority-based task scheduling** (P0-P4)
126
+ - **Budget tracking** across ticks
127
+ - **Experiment queue** management
128
+ - **Cross-tick progress** for long-running tasks
129
+
130
+ See `examples/reference/` for the full implementation.
@@ -0,0 +1,47 @@
1
+ # Reference Implementation
2
+
3
+ This is a snapshot of a real Homunculus system after **15 days of evolution** (1,235 commits).
4
+
5
+ ## What's Here
6
+
7
+ ```
8
+ reference/
9
+ ├── architecture.yaml # Real goal tree (9 goals, 46+ sub-goals)
10
+ ├── evolved-skills/ # 7 evolved skills (all 100% eval pass)
11
+ │ ├── api-system-diagnosis.md
12
+ │ ├── assistant-system-management.md
13
+ │ ├── claude-code-reference.md
14
+ │ ├── development-verification-patterns.md
15
+ │ ├── multi-agent-design-patterns.md
16
+ │ ├── shell-automation-patterns.md
17
+ │ ├── tdd-workflow.md
18
+ │ └── workflows.md
19
+ ├── evolved-agents/ # 3 specialized subagents
20
+ │ ├── assistant-explorer.md (Haiku — fast, read-only exploration)
21
+ │ ├── shell-debugger.md (Sonnet — shell script diagnosis)
22
+ │ └── tdd-runner.md (Sonnet — TDD red-green cycles)
23
+ └── evolved-evals/ # 8 eval specs (93 total scenarios)
24
+ ```
25
+
26
+ ## Key Numbers
27
+
28
+ | Metric | Value |
29
+ |--------|-------|
30
+ | System age | 15 days |
31
+ | Total instincts generated | 168 (84 active + 84 auto-archived) |
32
+ | Evolved skills | 7, all 100% eval pass |
33
+ | Eval scenarios | 93 total |
34
+ | Evolved agents | 3 |
35
+ | Goal tree | 9 root goals, 46+ sub-goals |
36
+ | Nightly agent commits | 134 across 11 nights |
37
+
38
+ ## How to Use This
39
+
40
+ Browse these files to understand what a mature Homunculus system looks like. Key things to notice:
41
+
42
+ 1. **architecture.yaml** — See how goals cascade into sub-goals, each with `purpose`, `metrics`, `health_check`, and `realized_by`
43
+ 2. **Evolved skills** — See how instincts converge into tested knowledge modules
44
+ 3. **Eval specs** — See how scenarios test skills with expected behaviors and anti-patterns
45
+ 4. **Agents** — See how specialized subagents are defined with model choice and tool restrictions
46
+
47
+ Your system will evolve differently based on your goals and usage patterns. This is just one possible outcome.