npm - homunculus-code - Versions diffs - 0.1.0 - Mend

homunculus-code 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/CONTRIBUTING.md +56 -0
package/LICENSE +21 -0
package/README.md +443 -0
package/bin/init.js +317 -0
package/commands/eval-skill.md +48 -0
package/commands/evolve.md +67 -0
package/commands/improve-skill.md +50 -0
package/core/evaluate-session.js +173 -0
package/core/observe.sh +51 -0
package/core/prune-instincts.js +159 -0
package/docs/nightly-agent.md +130 -0
package/examples/reference/README.md +47 -0
package/examples/reference/architecture.yaml +886 -0
package/examples/reference/evolved-agents/assistant-explorer.md +86 -0
package/examples/reference/evolved-agents/shell-debugger.md +108 -0
package/examples/reference/evolved-agents/tdd-runner.md +112 -0
package/examples/reference/evolved-evals/api-system-diagnosis.eval.yaml +125 -0
package/examples/reference/evolved-evals/assistant-system-management.eval.yaml +123 -0
package/examples/reference/evolved-evals/claude-code-reference.eval.yaml +394 -0
package/examples/reference/evolved-evals/development-verification-patterns.eval.yaml +117 -0
package/examples/reference/evolved-evals/multi-agent-design-patterns.eval.yaml +151 -0
package/examples/reference/evolved-evals/shell-automation-patterns.eval.yaml +209 -0
package/examples/reference/evolved-evals/tdd-workflow.eval.yaml +191 -0
package/examples/reference/evolved-evals/workflows.eval.yaml +148 -0
package/examples/reference/evolved-skills/api-system-diagnosis.md +234 -0
package/examples/reference/evolved-skills/assistant-system-management.md +199 -0
package/examples/reference/evolved-skills/development-verification-patterns.md +243 -0
package/examples/reference/evolved-skills/multi-agent-design-patterns.md +259 -0
package/examples/reference/evolved-skills/shell-automation-patterns.md +347 -0
package/examples/reference/evolved-skills/tdd-workflow.md +272 -0
package/examples/reference/evolved-skills/workflows.md +237 -0
package/package.json +25 -0
package/templates/CLAUDE.md.template +36 -0
package/templates/architecture.template.yaml +41 -0
package/templates/rules/evolution-system.md +29 -0

package/core/observe.sh ADDED Viewed

@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+# observe.sh — Observe tool usage for evolution
+# Usage: observe.sh <pre|post> [tool_name]
+# Called by Claude Code PostToolUse hook
+set -euo pipefail
+# Configuration — override these with environment variables
+HOMUNCULUS_DIR="${HOMUNCULUS_DIR:-$(pwd)/homunculus}"
+OBS_FILE="${HOMUNCULUS_DIR}/observations.jsonl"
+MAX_SIZE=$((10 * 1024 * 1024))  # 10MB
+COUNTER_FILE="/tmp/homunculus-tool-count-$$"
+PHASE="${1:-unknown}"
+# Read stdin (hook input JSON)
+INPUT=""
+if [ ! -t 0 ]; then
+  INPUT=$(cat)
+fi
+# Extract tool_name from stdin JSON
+TOOL_NAME="unknown"
+if command -v jq &>/dev/null && [ -n "$INPUT" ]; then
+  EXTRACTED=$(echo "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null)
+  [ -n "$EXTRACTED" ] && TOOL_NAME="$EXTRACTED"
+fi
+# Only observe post-tool usage
+[ "$PHASE" != "post" ] && exit 0
+# Rotate if too large
+if [ -f "$OBS_FILE" ] && [ "$(stat -f%z "$OBS_FILE" 2>/dev/null || stat -c%s "$OBS_FILE" 2>/dev/null || echo 0)" -gt "$MAX_SIZE" ]; then
+  TIMESTAMP=$(date +%Y%m%d%H%M%S)
+  gzip -c "$OBS_FILE" > "${OBS_FILE}.${TIMESTAMP}.gz"
+  : > "$OBS_FILE"
+fi
+# Write observation
+TIMESTAMP=$(date -u +%Y-%m-%dT%H:%M:%SZ)
+mkdir -p "$(dirname "$OBS_FILE")"
+if command -v jq &>/dev/null; then
+  jq -nc \
+    --arg ts "$TIMESTAMP" \
+    --arg phase "$PHASE" \
+    --arg tool "$TOOL_NAME" \
+    '{timestamp: $ts, phase: $phase, tool: $tool}' >> "$OBS_FILE"
+else
+  echo "{\"timestamp\":\"$TIMESTAMP\",\"phase\":\"$PHASE\",\"tool\":\"$TOOL_NAME\"}" >> "$OBS_FILE"
+fi

package/core/prune-instincts.js ADDED Viewed

@@ -0,0 +1,159 @@
+#!/usr/bin/env node
+// prune-instincts.js — Auto-archive low-value instincts
+//
+// Scoring dimensions:
+// 1. Age (older = more likely outdated)
+// 2. Confidence with time decay (half-life based)
+// 3. Skill coverage (already covered by a skill = lower value)
+//
+// Usage:
+//   node prune-instincts.js              # Dry run — list archive candidates
+//   node prune-instincts.js --apply      # Execute archival
+//   node prune-instincts.js --threshold 40  # Custom score threshold (default: 75)
+const fs = require('fs');
+const path = require('path');
+// Configuration — override with environment variables
+const BASE_DIR = process.env.HOMUNCULUS_BASE || process.cwd();
+const PERSONAL_DIR = path.join(BASE_DIR, 'homunculus', 'instincts', 'personal');
+const ARCHIVED_DIR = path.join(BASE_DIR, 'homunculus', 'instincts', 'archived');
+const SKILLS_DIR = path.join(BASE_DIR, 'homunculus', 'evolved', 'skills');
+const args = process.argv.slice(2);
+const applyMode = args.includes('--apply');
+const thresholdIdx = args.indexOf('--threshold');
+const ARCHIVE_THRESHOLD = thresholdIdx >= 0 ? parseInt(args[thresholdIdx + 1], 10) : 75;
+const CAPACITY_SOFT_LIMIT = 80;
+// Confidence decay: half-life in days
+const CONFIDENCE_HALF_LIFE_DAYS = 90;
+const DECAY_LAMBDA = Math.LN2 / CONFIDENCE_HALF_LIFE_DAYS;
+function safeRead(fp) {
+  try { return fs.readFileSync(fp, 'utf8'); } catch { return ''; }
+}
+function parseInstinct(filepath) {
+  const content = safeRead(filepath);
+  const name = path.basename(filepath, '.md');
+  const confidence = parseFloat((content.match(/confidence:\s*([\d.]+)/im) || [])[1] || '0.5');
+  const createdMatch = content.match(/(?:extracted|created|date):\s*"?([^"\n]+)"?/im);
+  const created = createdMatch ? new Date(createdMatch[1]) : null;
+  const updatedMatch = content.match(/(?:updated|last_reinforced):\s*"?([^"\n]+)"?/im);
+  const updated = updatedMatch ? new Date(updatedMatch[1]) : created;
+  // Confidence decay
+  const daysSinceUpdate = updated ? (Date.now() - updated.getTime()) / 86400000 : 180;
+  const effectiveConfidence = confidence * Math.exp(-DECAY_LAMBDA * daysSinceUpdate);
+  return { name, confidence, effectiveConfidence, created, updated, daysSinceUpdate, content };
+}
+function getSkillCoverage() {
+  const coverage = new Set();
+  if (!fs.existsSync(SKILLS_DIR)) return coverage;
+  for (const file of fs.readdirSync(SKILLS_DIR).filter(f => f.endsWith('.md'))) {
+    const content = safeRead(path.join(SKILLS_DIR, file));
+    // Skills often reference instinct names they were derived from
+    const sourceMatch = content.match(/source_instincts?:(.+)/gim);
+    if (sourceMatch) {
+      for (const match of sourceMatch) {
+        const names = match.replace(/source_instincts?:/i, '').split(/[,\s]+/);
+        names.forEach(n => coverage.add(n.trim()));
+      }
+    }
+  }
+  return coverage;
+}
+function scoreInstinct(instinct, skillCoverage, totalCount) {
+  let score = 100;
+  const reasons = [];
+  // Effective confidence (with decay)
+  if (instinct.effectiveConfidence < 0.5) {
+    score -= 30;
+    reasons.push(`low effective confidence: ${instinct.effectiveConfidence.toFixed(2)}`);
+  } else if (instinct.effectiveConfidence < 0.7) {
+    score -= 15;
+    reasons.push(`medium confidence: ${instinct.effectiveConfidence.toFixed(2)}`);
+  }
+  // Age penalty
+  if (instinct.daysSinceUpdate > 60) {
+    score -= 20;
+    reasons.push(`old: ${Math.floor(instinct.daysSinceUpdate)} days`);
+  } else if (instinct.daysSinceUpdate > 30) {
+    score -= 10;
+    reasons.push(`aging: ${Math.floor(instinct.daysSinceUpdate)} days`);
+  }
+  // Skill coverage
+  if (skillCoverage.has(instinct.name)) {
+    score -= 25;
+    reasons.push('covered by skill');
+  }
+  // Capacity pressure
+  if (totalCount > CAPACITY_SOFT_LIMIT) {
+    score -= Math.min(10, totalCount - CAPACITY_SOFT_LIMIT);
+    reasons.push(`capacity pressure: ${totalCount}/${CAPACITY_SOFT_LIMIT}`);
+  }
+  return { score: Math.max(0, score), reasons };
+}
+function main() {
+  if (!fs.existsSync(PERSONAL_DIR)) {
+    console.log('No instincts directory found. Nothing to prune.');
+    return;
+  }
+  const files = fs.readdirSync(PERSONAL_DIR).filter(f => f.endsWith('.md'));
+  const skillCoverage = getSkillCoverage();
+  const candidates = [];
+  for (const file of files) {
+    const instinct = parseInstinct(path.join(PERSONAL_DIR, file));
+    const { score, reasons } = scoreInstinct(instinct, skillCoverage, files.length);
+    if (score < ARCHIVE_THRESHOLD) {
+      candidates.push({ ...instinct, score, reasons });
+    }
+  }
+  candidates.sort((a, b) => a.score - b.score);
+  console.log(`\nInstincts: ${files.length} active | Threshold: ${ARCHIVE_THRESHOLD} | Candidates: ${candidates.length}`);
+  console.log('─'.repeat(60));
+  if (candidates.length === 0) {
+    console.log('No archive candidates found.');
+    return;
+  }
+  for (const c of candidates) {
+    console.log(`  ${c.score.toString().padStart(3)} | ${c.name}`);
+    console.log(`      ${c.reasons.join(', ')}`);
+  }
+  if (applyMode) {
+    console.log('\nArchiving...');
+    if (!fs.existsSync(ARCHIVED_DIR)) fs.mkdirSync(ARCHIVED_DIR, { recursive: true });
+    for (const c of candidates) {
+      const src = path.join(PERSONAL_DIR, `${c.name}.md`);
+      const dest = path.join(ARCHIVED_DIR, `${c.name}.md`);
+      fs.renameSync(src, dest);
+      console.log(`  Archived: ${c.name}`);
+    }
+    console.log(`\nDone. Archived ${candidates.length} instincts.`);
+  } else {
+    console.log(`\nDry run. Use --apply to archive these ${candidates.length} instincts.`);
+  }
+}
+main();

package/docs/nightly-agent.md ADDED Viewed

@@ -0,0 +1,130 @@
+# Nightly Agent Setup
+The nightly agent is what makes Homunculus truly autonomous. It runs a heartbeat loop while you sleep — checking goal health, evolving skills, researching improvements, and running experiments.
+## Prerequisites
+- Homunculus initialized in your project (`npx homunculus init`)
+- Claude Code CLI installed (`~/.local/bin/claude`)
+- macOS (launchd) or Linux (cron)
+## How It Works
+The nightly agent is a shell script that runs on a schedule. Each "tick" of the heartbeat:
+1. **Health Check** — Scans `architecture.yaml`, runs each goal's `health_check.command`
+2. **Evolve** — Runs `/evolve --auto` to converge instincts → skills → eval → improve
+3. **Research** — Uses Claude to scan for better implementations of unhealthy goals
+4. **Experiment** — Generates hypotheses, runs experiments in git worktrees
+5. **Report** — Produces a morning report summarizing all changes
+## Setup (macOS — launchd)
+### 1. Create the heartbeat script
+```bash
+#!/usr/bin/env bash
+# heartbeat.sh — Nightly evolution agent
+set -euo pipefail
+cd /path/to/your/project
+LOG="heartbeat-$(date +%Y%m%d).log"
+echo "[$(date)] Starting nightly evolution..." >> "$LOG"
+# Unset CLAUDECODE to avoid nested session conflicts
+unset CLAUDECODE
+# Run evolution pipeline
+claude -p "Run /evolve --auto. Then check goal health in architecture.yaml. \
+Research any unhealthy goals. Generate a morning report." \
+  --model claude-sonnet-4-6 \
+  --max-budget-usd 2.00 \
+  --no-session-persistence \
+  >> "$LOG" 2>&1
+echo "[$(date)] Nightly evolution complete." >> "$LOG"
+```
+### 2. Create the launchd plist
+Save to `~/Library/LaunchAgents/com.homunculus.heartbeat.plist`:
+```xml
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
+  "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>Label</key>
+    <string>com.homunculus.heartbeat</string>
+    <key>ProgramArguments</key>
+    <array>
+        <string>/path/to/your/project/heartbeat.sh</string>
+    </array>
+    <key>StartCalendarInterval</key>
+    <dict>
+        <key>Hour</key>
+        <integer>2</integer>
+        <key>Minute</key>
+        <integer>0</integer>
+    </dict>
+    <key>EnvironmentVariables</key>
+    <dict>
+        <key>PATH</key>
+        <string>/usr/local/bin:/usr/bin:/bin:~/.local/bin:/opt/homebrew/bin</string>
+    </dict>
+    <key>StandardOutPath</key>
+    <string>/tmp/homunculus-heartbeat.log</string>
+    <key>StandardErrorPath</key>
+    <string>/tmp/homunculus-heartbeat.log</string>
+</dict>
+</plist>
+```
+### 3. Load the agent
+```bash
+launchctl load ~/Library/LaunchAgents/com.homunculus.heartbeat.plist
+```
+## Setup (Linux — cron)
+```bash
+# Run at 2 AM every night
+0 2 * * * cd /path/to/your/project && bash heartbeat.sh
+```
+> Note: cron does not have access to macOS Keychain. If your Claude CLI uses OAuth, use launchd instead.
+## Budget Control
+The `--max-budget-usd` flag controls how much the nightly agent can spend per run. Start with `$2.00` and adjust based on your needs.
+## Morning Report
+After a successful run, the agent produces a report. You can configure it to:
+- Write to a file (`heartbeat/data/morning-report.md`)
+- Send to Discord via webhook
+- Push a desktop notification via `osascript`
+## Monitoring
+Check if the agent ran:
+```bash
+# Last run time
+ls -la /tmp/homunculus-heartbeat.log
+# Recent output
+tail -50 /tmp/homunculus-heartbeat.log
+```
+## Advanced: Multi-Tick Heartbeat
+The reference implementation uses a more sophisticated heartbeat with:
+- **Priority-based task scheduling** (P0-P4)
+- **Budget tracking** across ticks
+- **Experiment queue** management
+- **Cross-tick progress** for long-running tasks
+See `examples/reference/` for the full implementation.

package/examples/reference/README.md ADDED Viewed

@@ -0,0 +1,47 @@
+# Reference Implementation
+This is a snapshot of a real Homunculus system after **15 days of evolution** (1,235 commits).
+## What's Here
+```
+reference/
+├── architecture.yaml          # Real goal tree (9 goals, 46+ sub-goals)
+├── evolved-skills/            # 7 evolved skills (all 100% eval pass)
+│   ├── api-system-diagnosis.md
+│   ├── assistant-system-management.md
+│   ├── claude-code-reference.md
+│   ├── development-verification-patterns.md
+│   ├── multi-agent-design-patterns.md
+│   ├── shell-automation-patterns.md
+│   ├── tdd-workflow.md
+│   └── workflows.md
+├── evolved-agents/            # 3 specialized subagents
+│   ├── assistant-explorer.md  (Haiku — fast, read-only exploration)
+│   ├── shell-debugger.md      (Sonnet — shell script diagnosis)
+│   └── tdd-runner.md          (Sonnet — TDD red-green cycles)
+└── evolved-evals/             # 8 eval specs (93 total scenarios)
+```
+## Key Numbers
+| Metric | Value |
+|--------|-------|
+| System age | 15 days |
+| Total instincts generated | 168 (84 active + 84 auto-archived) |
+| Evolved skills | 7, all 100% eval pass |
+| Eval scenarios | 93 total |
+| Evolved agents | 3 |
+| Goal tree | 9 root goals, 46+ sub-goals |
+| Nightly agent commits | 134 across 11 nights |
+## How to Use This
+Browse these files to understand what a mature Homunculus system looks like. Key things to notice:
+1. **architecture.yaml** — See how goals cascade into sub-goals, each with `purpose`, `metrics`, `health_check`, and `realized_by`
+2. **Evolved skills** — See how instincts converge into tested knowledge modules
+3. **Eval specs** — See how scenarios test skills with expected behaviors and anti-patterns
+4. **Agents** — See how specialized subagents are defined with model choice and tool restrictions
+Your system will evolve differently based on your goals and usage patterns. This is just one possible outcome.