homunculus-code 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/CONTRIBUTING.md +56 -0
  2. package/LICENSE +21 -0
  3. package/README.md +443 -0
  4. package/bin/init.js +317 -0
  5. package/commands/eval-skill.md +48 -0
  6. package/commands/evolve.md +67 -0
  7. package/commands/improve-skill.md +50 -0
  8. package/core/evaluate-session.js +173 -0
  9. package/core/observe.sh +51 -0
  10. package/core/prune-instincts.js +159 -0
  11. package/docs/nightly-agent.md +130 -0
  12. package/examples/reference/README.md +47 -0
  13. package/examples/reference/architecture.yaml +886 -0
  14. package/examples/reference/evolved-agents/assistant-explorer.md +86 -0
  15. package/examples/reference/evolved-agents/shell-debugger.md +108 -0
  16. package/examples/reference/evolved-agents/tdd-runner.md +112 -0
  17. package/examples/reference/evolved-evals/api-system-diagnosis.eval.yaml +125 -0
  18. package/examples/reference/evolved-evals/assistant-system-management.eval.yaml +123 -0
  19. package/examples/reference/evolved-evals/claude-code-reference.eval.yaml +394 -0
  20. package/examples/reference/evolved-evals/development-verification-patterns.eval.yaml +117 -0
  21. package/examples/reference/evolved-evals/multi-agent-design-patterns.eval.yaml +151 -0
  22. package/examples/reference/evolved-evals/shell-automation-patterns.eval.yaml +209 -0
  23. package/examples/reference/evolved-evals/tdd-workflow.eval.yaml +191 -0
  24. package/examples/reference/evolved-evals/workflows.eval.yaml +148 -0
  25. package/examples/reference/evolved-skills/api-system-diagnosis.md +234 -0
  26. package/examples/reference/evolved-skills/assistant-system-management.md +199 -0
  27. package/examples/reference/evolved-skills/development-verification-patterns.md +243 -0
  28. package/examples/reference/evolved-skills/multi-agent-design-patterns.md +259 -0
  29. package/examples/reference/evolved-skills/shell-automation-patterns.md +347 -0
  30. package/examples/reference/evolved-skills/tdd-workflow.md +272 -0
  31. package/examples/reference/evolved-skills/workflows.md +237 -0
  32. package/package.json +25 -0
  33. package/templates/CLAUDE.md.template +36 -0
  34. package/templates/architecture.template.yaml +41 -0
  35. package/templates/rules/evolution-system.md +29 -0
@@ -0,0 +1,886 @@
1
+ # architecture.yaml — Goal-oriented system manifest
2
+ # Each node is a GOAL, not a system. Systems are means to achieve goals.
3
+ # Used by evolution system: assess each goal independently,
4
+ # find better implementations, evaluate new research against goals.
5
+ #
6
+ # Node schema:
7
+ # purpose: Why this goal exists (serves which parent goal)
8
+ # realized_by: What implements it (path, system, or description)
9
+ # metrics: How to measure success [{name, source, healthy}]
10
+ # tests: Leaf-level verification [test file patterns]
11
+ # goals: Sub-goals (recursive)
12
+ # agents: Subagents that serve this goal [{name, role, trigger}]
13
+ # - name: agent definition filename (without .md)
14
+ # - role: "specialist" (spawned on demand) | "autonomous" (cron/heartbeat triggered)
15
+ # - trigger: what causes this agent to run
16
+ # health_check: Machine-executable health verification
17
+ # - command: shell command to run (exit 0 = healthy)
18
+ # - expected: human-readable description of healthy state
19
+ # Used by /harness-audit to scan all goals programmatically.
20
+ #
21
+ # ADR: When making significant changes (add/remove goals, change realized_by,
22
+ # restructure agents), write an ADR to decisions/YYYY-MM-DD-title.yaml.
23
+ # Use scripts/goal-impact.sh to check affected goals before changing.
24
+ #
25
+ # Last updated: 2026-03-20
26
+
27
+ version: "2.2"
28
+
29
+ # ═══════════════════════════════════════════════
30
+ # Root: the single top-level aspiration
31
+ # ═══════════════════════════════════════════════
32
+ root:
33
+ purpose: "A personal AI assistant that continuously evolves and acts autonomously"
34
+ goals:
35
+
36
+ # ───────────────────────────────────────────
37
+ # 1. Autonomous Action
38
+ # ───────────────────────────────────────────
39
+ autonomous_action:
40
+ purpose: "Act without human trigger — scheduled tasks, proactive maintenance, research"
41
+ realized_by: heartbeat/ + shell-automation-patterns skill
42
+
43
+ goals:
44
+ scheduled_dispatch:
45
+ purpose: "Time-aware task routing (day checks vs night agent)"
46
+ realized_by: heartbeat/heartbeat.sh + heartbeat/tests/heartbeat.test.sh + /api/sleep (sleep-flag trigger) + /api/night-status + /api/bedtime-preview
47
+ metrics:
48
+ - name: dispatch_accuracy
49
+ source: heartbeat/logs/heartbeat.log
50
+ healthy: "correct mode selected (day/night) 100%"
51
+
52
+ night_research:
53
+ purpose: "Autonomous knowledge acquisition during idle hours"
54
+ realized_by: heartbeat/prompts/p2.md + .envrc (direnv worktree isolation)
55
+ goals:
56
+ topic_discovery:
57
+ purpose: "Find relevant research topics from news, HN, blogs"
58
+ realized_by: heartbeat/prompts/p2.md
59
+ metrics:
60
+ - name: topics_per_night
61
+ source: heartbeat/data/night-report.md
62
+ healthy: ">= 3"
63
+ structured_reporting:
64
+ purpose: "Convert raw research into actionable 6-section report"
65
+ realized_by: heartbeat/scripts/summarize-night.sh
66
+ metrics:
67
+ - name: report_completeness
68
+ source: heartbeat/data/night-report.md
69
+ healthy: "all 6 sections present"
70
+ action_generation:
71
+ purpose: "Propose concrete next steps from research findings"
72
+ realized_by: heartbeat/prompts/shared.md (建議行動 guidelines)
73
+ metrics:
74
+ - name: suggestion_adopt_rate
75
+ source: quest-board state.json (suggested tasks confirmed vs dismissed)
76
+ healthy: ">= 50%"
77
+
78
+ system_maintenance:
79
+ purpose: "Keep infrastructure healthy without human intervention"
80
+ realized_by: heartbeat/checks/ + heartbeat/checks/harness-audit.sh (weekly)
81
+ agents:
82
+ - name: shell-debugger
83
+ role: specialist
84
+ trigger: "shell script failure or unexpected output"
85
+ goals:
86
+ server_monitoring:
87
+ purpose: "Detect and report quest-board server issues"
88
+ realized_by: heartbeat/checks/server-health.sh
89
+ health_check:
90
+ command: "curl -sf http://localhost:3000/api/health | jq -e .ok"
91
+ expected: "quest-board responds 200 with {ok:true}"
92
+ metrics:
93
+ - name: server_uptime
94
+ source: /api/health
95
+ healthy: "responds 200"
96
+ stats_snapshots:
97
+ purpose: "Daily data capture for trend analysis"
98
+ realized_by: heartbeat/checks/system-maintenance.sh
99
+ health_check:
100
+ command: "ls ~/assistant/quest-board/data/stats-history/$(date +%Y-%m-%d)*.json 2>/dev/null | head -1"
101
+ expected: "today's snapshot file exists"
102
+ metrics:
103
+ - name: snapshot_freshness
104
+ source: quest-board/data/stats-history/
105
+ healthy: "snapshot exists for today"
106
+ search_index:
107
+ purpose: "Keep QMD knowledge base current"
108
+ realized_by: heartbeat/checks/system-maintenance.sh (qmd update+embed)
109
+ metrics:
110
+ - name: index_age
111
+ source: qmd status
112
+ healthy: "< 24h"
113
+ log_rotation:
114
+ purpose: "Prevent unbounded log growth"
115
+ realized_by: heartbeat/checks/system-maintenance.sh
116
+ metrics:
117
+ - name: log_size
118
+ source: heartbeat/logs/
119
+ healthy: "< 10MB per file"
120
+
121
+ scheduled_projects:
122
+ purpose: "Autonomous periodic projects that enrich user context"
123
+ realized_by: projects/
124
+ goals:
125
+ ai_trading_strategies:
126
+ purpose: "Scheduled autonomous strategy evolution runs"
127
+ realized_by: projects/ai-trading-strategies/evolve.sh (launchd)
128
+ # Full project details: personal_growth.active_projects.ai_trading_strategies
129
+ health_check:
130
+ command: "ls ~/assistant/projects/ai-trading-strategies/reports/$(date +%Y-%m-%d)-*.md 2>/dev/null | head -1"
131
+ expected: "today's strategy report exists"
132
+
133
+ daily_news:
134
+ purpose: "Curate and deliver daily news digest from multiple sources"
135
+ realized_by: daily-news/
136
+ goals:
137
+ source_collection:
138
+ purpose: "Gather news from RSS, Twitter, Gmail newsletters"
139
+ realized_by: daily-news/sources/ (twitter.sh, rss-feeds.sh, gmail-newsletters.sh)
140
+ metrics:
141
+ - name: source_count
142
+ source: daily-news/config.json
143
+ healthy: ">= 3 active sources"
144
+ summarization:
145
+ purpose: "AI-summarize collected items into readable digest"
146
+ realized_by: daily-news/run.sh + daily-news/prompt.md
147
+ metrics:
148
+ - name: digest_freshness
149
+ source: daily-news/data/latest-summary.md
150
+ healthy: "generated within 24h"
151
+ twitter_prefetch:
152
+ purpose: "Pre-cache Twitter data for faster digest generation"
153
+ realized_by: daily-news/sources/twitter.sh prefetch (launchd)
154
+
155
+ notifications:
156
+ purpose: "Push timely reminders and reports to user"
157
+ realized_by: heartbeat/checks/
158
+ goals:
159
+ task_reminders:
160
+ purpose: "Remind incomplete daily tasks at key hours"
161
+ realized_by: heartbeat/checks/quest-reminder.sh
162
+ metrics:
163
+ - name: reminder_timing
164
+ source: heartbeat/logs/heartbeat.log
165
+ healthy: "fires at 12/18/22h"
166
+ morning_report:
167
+ purpose: "Deliver night research results at wake time"
168
+ realized_by: heartbeat/checks/night-report.sh
169
+ metrics:
170
+ - name: report_delivered
171
+ source: Discord channel
172
+ healthy: "pushed by 08:30"
173
+
174
+ # ───────────────────────────────────────────
175
+ # 2. Continuous Evolution
176
+ # ───────────────────────────────────────────
177
+ continuous_evolution:
178
+ purpose: "Learn from every interaction and improve over time"
179
+ realized_by: homunculus/ + .claude/rules/evolution-system.md
180
+
181
+ goals:
182
+ behavior_observation:
183
+ purpose: "Capture raw tool usage patterns for later analysis"
184
+ realized_by: scripts/observe.sh
185
+ metrics:
186
+ - name: observation_rate
187
+ source: homunculus/observations.jsonl
188
+ healthy: "> 0 entries per session"
189
+
190
+ pattern_extraction:
191
+ purpose: "Distill observations into reusable behavioral patterns"
192
+ realized_by: scripts/evaluate-session.js + /learn + /learn-eval + /instinct-status commands
193
+ health_check:
194
+ command: "count=$(ls ~/assistant/homunculus/instincts/personal/*.md 2>/dev/null | wc -l); [ $count -ge 30 ] && [ $count -le 90 ]"
195
+ expected: "active instincts between 30-90"
196
+ metrics:
197
+ - name: active_instincts
198
+ source: homunculus/instincts/personal/
199
+ healthy: "30-90 (not too few, not bloated)"
200
+ - name: extraction_precision
201
+ source: instinct adoption (not immediately archived)
202
+ healthy: "> 80% survive 7 days"
203
+
204
+ skill_aggregation:
205
+ purpose: "Merge related instincts into tested, versioned skills"
206
+ realized_by: /evolve command + homunculus/evolved/skills/
207
+ health_check:
208
+ command: "count=$(ls ~/assistant/homunculus/evolved/skills/*.md 2>/dev/null | wc -l); [ $count -ge 5 ]"
209
+ expected: "at least 5 evolved skills exist"
210
+ metrics:
211
+ - name: skill_count
212
+ source: homunculus/evolved/skills/
213
+ healthy: ">= 5"
214
+ - name: avg_pass_rate
215
+ source: homunculus/evolved/evals/history.jsonl
216
+ healthy: ">= 90%"
217
+
218
+ eval_and_improve:
219
+ purpose: "Verify skill quality and iteratively improve"
220
+ realized_by: /eval-skill + /improve-skill commands + data/eval-config.json (auto-tuning)
221
+ metrics:
222
+ - name: eval_coverage
223
+ source: homunculus/evolved/evals/
224
+ healthy: "every skill has eval spec"
225
+ - name: improvement_convergence
226
+ source: homunculus/evolved/evals/history.jsonl
227
+ healthy: "reaches 100% within 5 rounds"
228
+ - name: eval_discrimination
229
+ source: homunculus/evolved/evals/history.jsonl
230
+ healthy: "> 30% (trend indicator — differences < 3pp not statistically significant)"
231
+ current: "30.3% (10/33)"
232
+
233
+ stale_cleanup:
234
+ purpose: "Remove outdated knowledge to keep signal-to-noise high"
235
+ realized_by: stats.js (stale_instincts/stale_memories) + scripts/prune-instincts.js + heartbeat guidance
236
+ metrics:
237
+ - name: stale_instinct_ratio
238
+ source: /api/stats/references
239
+ healthy: "< 15%"
240
+ - name: stale_memory_ratio
241
+ source: /api/stats/references
242
+ healthy: "< 10%"
243
+ tests:
244
+ - quest-board/tests/r130.test.js
245
+
246
+ data_driven_decisions:
247
+ purpose: "Ground evolution choices in quantitative evidence"
248
+ realized_by: quest-board/stats.js + scripts/evolution-weekly-report.sh (launchd)
249
+ goals:
250
+ statistics_aggregation:
251
+ purpose: "Collect multi-source metrics into unified dashboard"
252
+ realized_by: quest-board/stats.js → /api/stats
253
+ tests:
254
+ - quest-board/tests/r91.test.js
255
+ trend_analysis:
256
+ purpose: "Compare current vs historical to detect drift"
257
+ realized_by: quest-board/stats.js → /api/stats/trends
258
+ metrics:
259
+ - name: trend_data_points
260
+ source: quest-board/data/stats-history/
261
+ healthy: ">= 7 daily snapshots"
262
+ reference_tracking:
263
+ purpose: "Know which knowledge is actually being used"
264
+ realized_by: scripts/observe.sh → data/reference-tracking.jsonl → /api/stats/references
265
+ tests:
266
+ - quest-board/tests/r130.test.js
267
+ - quest-board/tests/r136.test.js
268
+
269
+ experimentation:
270
+ purpose: "Test hypotheses safely before adopting changes"
271
+ realized_by: homunculus/experiments/ + /experiment command + git worktrees + .envrc
272
+ metrics:
273
+ - name: experiment_rate
274
+ source: homunculus/experiments/history.jsonl
275
+ healthy: ">= 1 per week"
276
+ - name: experiment_pass_rate
277
+ source: homunculus/experiments/history.jsonl
278
+ healthy: ">= 60%"
279
+
280
+ agent_evolution:
281
+ purpose: "Evolve subagent definitions based on performance data"
282
+ realized_by: homunculus/evolved/agents/ + /api/subagent/track + /api/subagent/stats
283
+ goals:
284
+ agent_registry:
285
+ purpose: "Track all agents with version, alignment, and metrics"
286
+ realized_by: homunculus/evolved/agents/ (metadata in frontmatter) + scripts/sync-agent-config.js (symlinks)
287
+ agent_evaluation:
288
+ purpose: "Verify agent effectiveness against defined scenarios"
289
+ realized_by: homunculus/evolved/evals/agents/ (placeholder — eval specs not yet written)
290
+ agent_optimization:
291
+ purpose: "Auto-adjust prompt/model/tools based on performance stats"
292
+ realized_by: /api/subagent/stats recommendations + heartbeat
293
+
294
+ # ───────────────────────────────────────────
295
+ # 3. Memory & Knowledge
296
+ # ───────────────────────────────────────────
297
+ memory_and_knowledge:
298
+ purpose: "Retain and retrieve relevant information across sessions"
299
+ agents:
300
+ - name: assistant-explorer
301
+ role: specialist
302
+ trigger: "exploring ~/assistant directory structure, checking system state"
303
+
304
+ goals:
305
+ cross_session_persistence:
306
+ purpose: "Remember user context, decisions, and preferences between conversations"
307
+ realized_by: ~/.claude/projects/-Users-jinx-assistant/memory/
308
+ metrics:
309
+ - name: memory_index_size
310
+ source: MEMORY.md
311
+ healthy: "< 200 lines"
312
+ - name: memory_freshness
313
+ source: MEMORY.md last-modified
314
+ healthy: "updated within 7 days"
315
+
316
+ semantic_search:
317
+ purpose: "Find relevant knowledge by meaning, not just keywords"
318
+ realized_by: QMD v2.0.1 (Bun, 6 collections, MCP server) + context7 MCP + scripts/qmd-contextual-enrichment.sh + data/lenny-data/
319
+ metrics:
320
+ - name: indexed_docs
321
+ source: qmd status
322
+ healthy: "> 100"
323
+ - name: search_relevance
324
+ source: manual assessment
325
+ healthy: "top-3 results contain answer > 80%"
326
+
327
+ memory_quality:
328
+ purpose: "Ensure stored memories are accurate, current, and well-organized"
329
+ realized_by: stats.js (memory_ranking) + heartbeat guidance
330
+ goals:
331
+ ranking_and_reorg:
332
+ purpose: "Surface frequently-used memories, sink stale ones"
333
+ realized_by: /api/stats/references → memory_ranking
334
+ tests:
335
+ - quest-board/tests/r136.test.js
336
+ staleness_detection:
337
+ purpose: "Flag memories that reference outdated information"
338
+ realized_by: quest-board/stats.js (stale_memories) + heartbeat guidance
339
+ metrics:
340
+ - name: outdated_memory_count
341
+ source: /api/stats/references (stale_memories)
342
+ healthy: "0"
343
+
344
+ session_management:
345
+ purpose: "Track session history for pattern analysis and continuity"
346
+ realized_by: sessions/ + scripts/session-start.js + scripts/session-end.js
347
+ metrics:
348
+ - name: session_capture_rate
349
+ source: sessions/
350
+ healthy: "every session has summary"
351
+
352
+ # ───────────────────────────────────────────
353
+ # 4. Task Management
354
+ # ───────────────────────────────────────────
355
+ task_management:
356
+ purpose: "Track, prioritize, and complete work across all time horizons"
357
+ realized_by: quest-board/ + /todo command + .claude/rules/quest-system.md + .claude/rules/quest-board-api.md
358
+
359
+ goals:
360
+ daily_habits:
361
+ purpose: "Reinforce recurring positive behaviors with RPG rewards"
362
+ realized_by: /api/habit/* + state.json today.habits
363
+ tests:
364
+ - quest-board/tests/habit-api.test.js
365
+ metrics:
366
+ - name: daily_completion_rate
367
+ source: state.json
368
+ healthy: ">= 60%"
369
+
370
+ personal_quests:
371
+ purpose: "Track one-off personal tasks to completion"
372
+ realized_by: /api/quest/*
373
+ tests:
374
+ - quest-board/tests/quest-api.test.js
375
+
376
+ system_upgrades:
377
+ purpose: "Structured development workflow for system improvements"
378
+ realized_by: /api/forge/* + /api/forge/confirm + /api/forge/dismiss + /forge-dev command
379
+ tests:
380
+ - quest-board/tests/forge-api.test.js
381
+ - quest-board/tests/forge-advanced-api.test.js
382
+ metrics:
383
+ - name: forge_completion_rate
384
+ source: /api/stats/forge
385
+ healthy: ">= 50%"
386
+ - name: review_pass_rate
387
+ source: /api/forge/metrics
388
+ healthy: ">= 80%"
389
+
390
+ focus_timer:
391
+ purpose: "Pomodoro-style focus sessions for time-boxed work"
392
+ realized_by: /api/timer (GET/POST/DELETE) + quest-board/data/timer.json
393
+
394
+ daily_refresh:
395
+ purpose: "Reset daily habits and clean up ephemeral state each morning"
396
+ realized_by: quest-board/refresh.sh (launchd)
397
+
398
+ stats_and_insights:
399
+ purpose: "Aggregate cross-system metrics for informed decisions"
400
+ realized_by: quest-board/stats.js
401
+ tests:
402
+ - quest-board/tests/r91.test.js
403
+ - quest-board/tests/r92.test.js
404
+
405
+ # ───────────────────────────────────────────
406
+ # 5. Communication
407
+ # ───────────────────────────────────────────
408
+ communication:
409
+ purpose: "Interact with user through multiple channels and modalities"
410
+
411
+ goals:
412
+ discord:
413
+ purpose: "Real-time text communication with per-topic context"
414
+ realized_by: bridge/ (Go)
415
+ goals:
416
+ multi_channel_chat:
417
+ purpose: "Per-channel conversations with dedicated system prompts"
418
+ realized_by: bridge/ + config.toml (per-channel system_prompt_file)
419
+ health_check:
420
+ command: "pgrep -f 'assistant/bridge/bridge' > /dev/null"
421
+ expected: "discord bridge process is running"
422
+ metrics:
423
+ - name: bridge_uptime
424
+ source: bridge process
425
+ healthy: "running"
426
+ - name: channel_count
427
+ source: bridge/config.toml
428
+ healthy: ">= 5"
429
+ remote_control:
430
+ purpose: "Accept and execute commands via Discord messages"
431
+ realized_by: bridge/core/engine.go (RC sessions) + claude-rc-wrapper.sh (launchd)
432
+ metrics:
433
+ - name: rc_response_time
434
+ source: Discord timestamps
435
+ healthy: "< 30s"
436
+ url_summarization:
437
+ purpose: "Auto-summarize URLs shared in chat"
438
+ realized_by: bridge/core/engine.go (Haiku + WebFetch)
439
+ metrics:
440
+ - name: summarization_success
441
+ source: bridge logs
442
+ healthy: "> 90%"
443
+ notification_delivery:
444
+ purpose: "Push system notifications (reminders, reports) to user"
445
+ realized_by: scripts/discord-alert.sh + Discord webhooks (called by heartbeat checks)
446
+ metrics:
447
+ - name: delivery_success
448
+ source: heartbeat logs
449
+ healthy: "> 95%"
450
+
451
+ dashboard:
452
+ purpose: "Visual status overview — make system state glanceable"
453
+ realized_by: quest-board/web/
454
+ goals:
455
+ task_visualization:
456
+ purpose: "Show habits, quests, forge tasks in RPG-style interface"
457
+ realized_by: quest-board/web/app.js
458
+ metrics:
459
+ - name: dashboard_uptime
460
+ source: /api/health
461
+ healthy: "responds 200"
462
+ progress_feedback:
463
+ purpose: "Make progress tangible through XP, levels, stats"
464
+ realized_by: quest-board/web/ (player panel)
465
+ stats_dashboard:
466
+ purpose: "Surface evolution health, trends, recommendations"
467
+ realized_by: quest-board/web/ (report tab) + onui MCP + /api/skills + /api/instinct/:id + /api/news + /api/night-report
468
+
469
+ cli:
470
+ purpose: "Direct terminal interaction for development and system work"
471
+ realized_by: Claude Code CLI + .claude/settings.json hooks + claude-code-reference skill + /tips command + Notification(idle_prompt) hook + data/hook-profile-config.json + .claude/rules/core-patterns.md + .claude/rules/claude-code-features.md
472
+ goals:
473
+ session_lifecycle:
474
+ purpose: "Initialize and finalize sessions with proper state management"
475
+ realized_by: scripts/session-start.js (SessionStart) + scripts/session-end.js (Stop) + scripts/auto-commit.sh (Stop)
476
+ behavior_hooks:
477
+ purpose: "Observe tool usage and suggest optimizations in real-time"
478
+ realized_by: scripts/observe.sh (PreToolUse/PostToolUse) + scripts/suggest-compact.sh (PreToolUse Edit|Write)
479
+ context_hooks:
480
+ purpose: "Preserve and restore context across compactions"
481
+ realized_by: scripts/pre-compact.sh (PreCompact) + scripts/post-compact.sh (PostCompact)
482
+ instruction_tracking:
483
+ purpose: "Track which CLAUDE.md/rules files are loaded per session"
484
+ realized_by: scripts/observe-instructions.sh (InstructionsLoaded)
485
+ metrics:
486
+ - name: hook_error_rate
487
+ source: hook execution logs
488
+ healthy: "0 errors"
489
+
490
+ # ───────────────────────────────────────────
491
+ # 6. Resource Awareness
492
+ # ───────────────────────────────────────────
493
+ resource_awareness:
494
+ purpose: "Know what resources are available and use them efficiently"
495
+
496
+ goals:
497
+ api_budget:
498
+ purpose: "Ensure must-do tasks always run; use remaining budget for research and experiments"
499
+ # Design philosophy (Claude Code Max subscription):
500
+ # - P0+P1 are non-negotiable — always run even over budget
501
+ # - P2 research + P3 experiments scale with available budget
502
+ # - No hard USD limits; Claude Code behaves well enough
503
+ # - Cross-tick budget awareness: later ticks detect increased budget
504
+ realized_by: heartbeat/heartbeat.sh (budget logic) + quest-board/data/usage-cache.json
505
+ metrics:
506
+ - name: weekly_usage
507
+ source: /api/usage
508
+ healthy: "within weekly budget curve (hours_into_week * 100 / 168 + 5%)"
509
+ - name: must_do_completion
510
+ source: heartbeat/data/night-report.md
511
+ healthy: "P0+P1 complete every night regardless of budget"
512
+ goals:
513
+ usage_tracking:
514
+ purpose: "Real-time visibility into API consumption"
515
+ realized_by: heartbeat/check-usage.js + quest-board/data/usage-cache.json
516
+ night_budget_scaling:
517
+ purpose: "MP/HP dual-layer budget: MP(5h hard limit) + HP(7d soft budget) → BUDGET_LEVEL label"
518
+ realized_by: heartbeat/heartbeat.sh (check_usage_budget + should_skip_phase)
519
+ # MP>=90% → mp_empty(skip tick), HP remaining<0 → skip, <2% → half, >=2% → full
520
+ cross_tick_progress:
521
+ purpose: "Resume phase pipeline across heartbeat ticks"
522
+ realized_by: heartbeat/data/night-progress.json (phases_completed/skipped/failed)
523
+
524
+ compute:
525
+ purpose: "Utilize local hardware effectively (MacBook Air, full machine access)"
526
+ realized_by: macOS + launchd + local processes + mac-use MCP
527
+ goals:
528
+ local_services:
529
+ purpose: "Manage always-on local services"
530
+ realized_by: launchd (heartbeat, bridge, quest-board, daily-news, trading, jarvis-dashboard, cloudflared) + node
531
+ metrics:
532
+ - name: service_count
533
+ source: launchctl list
534
+ healthy: "heartbeat + bridge running"
535
+ background_execution:
536
+ purpose: "Run long tasks without blocking user interaction"
537
+ realized_by: tmux + background agents + launchd
538
+ metrics:
539
+ - name: resource_utilization
540
+ source: system metrics
541
+ healthy: "CPU < 80% sustained"
542
+
543
+ accounts:
544
+ purpose: "Leverage external service accounts for expanded capabilities"
545
+ goals:
546
+ email:
547
+ purpose: "Send/receive email on behalf of user"
548
+ realized_by: "gog gmail (starpincer@gmail.com)"
549
+ metrics:
550
+ - name: email_accessible
551
+ source: "gog gmail list --max 1"
552
+ healthy: "responds without error"
553
+ github:
554
+ purpose: "Manage repos, PRs, issues"
555
+ realized_by: "gh CLI"
556
+ metrics:
557
+ - name: gh_authenticated
558
+ source: "gh auth status"
559
+ healthy: "logged in"
560
+ social:
561
+ purpose: "Post and read social media (Twitter/X)"
562
+ realized_by: planned (Twitter/X account access)
563
+ metrics:
564
+ - name: social_accessible
565
+ source: planned
566
+ healthy: "can post and read"
567
+ discord:
568
+ purpose: "Bot account for bridge communication"
569
+ realized_by: bridge/config.toml (bot token)
570
+ metrics:
571
+ - name: bot_connected
572
+ source: bridge process
573
+ healthy: "connected to gateway"
574
+
575
+ storage:
576
+ purpose: "Manage disk space and data lifecycle"
577
+ realized_by: macOS filesystem
578
+ metrics:
579
+ - name: disk_free
580
+ source: "df -h"
581
+ healthy: "> 10GB free"
582
+ - name: git_repo_size
583
+ source: "du -sh ~/assistant/.git"
584
+ healthy: "< 500MB"
585
+
586
+ # ───────────────────────────────────────────
587
+ # 7. Development Quality
588
+ # ───────────────────────────────────────────
589
+ development_quality:
590
+ purpose: "Ensure changes are safe, tested, and automatically tracked"
591
+ realized_by: api-system-diagnosis skill
592
+
593
+ goals:
594
+ test_infrastructure:
595
+ purpose: "Fast, isolated, reliable test suite for quest-board"
596
+ realized_by: quest-board/tests/ + tdd-workflow skill
597
+ agents:
598
+ - name: tdd-runner
599
+ role: specialist
600
+ trigger: "forge-dev gen-tests / forge-dev start"
601
+ health_check:
602
+ command: "cd $HOME/assistant/quest-board && node --experimental-test-isolation=none --test --test-concurrency=1 $(ls tests/*.test.js) 2>&1 | grep -q '# fail 0'"
603
+ expected: "all quest-board tests pass (0 failures)"
604
+ metrics:
605
+ - name: test_count
606
+ source: quest-board/tests/*.test.js
607
+ healthy: ">= 99"
608
+ - name: pass_rate
609
+ source: test runner output
610
+ healthy: "100%"
611
+ - name: execution_time
612
+ source: test runner output
613
+ healthy: "< 10s"
614
+ tests:
615
+ - quest-board/tests/smoke.test.js
616
+
617
+ quality_gates:
618
+ purpose: "Pre-flight checks before risky operations"
619
+ realized_by: /quality-gate command
620
+ metrics:
621
+ - name: gate_bypass_rate
622
+ source: manual observation
623
+ healthy: "0%"
624
+
625
+ auto_versioning:
626
+ purpose: "Track all evolution changes in git automatically"
627
+ realized_by: scripts/auto-commit.sh
628
+ metrics:
629
+ - name: uncommitted_evolution_files
630
+ source: git status
631
+ healthy: "0"
632
+
633
+ context_management:
634
+ purpose: "Maintain healthy context window throughout sessions"
635
+ realized_by: scripts/suggest-compact.sh + scripts/pre-compact.sh
636
+ metrics:
637
+ - name: compact_data_loss
638
+ source: manual observation
639
+ healthy: "0 critical state lost"
640
+
641
+ # ───────────────────────────────────────────
642
+ # 8. Operational Intelligence
643
+ # ───────────────────────────────────────────
644
+ operational_intelligence:
645
+ purpose: "Choose the right action pattern for any given context"
646
+ realized_by: homunculus/evolved/skills/workflows.md
647
+
648
+ goals:
649
+ workflow_selection:
650
+ purpose: "Match incoming request to the correct workflow (research/dev/debug/review/...)"
651
+ realized_by: homunculus/evolved/skills/workflows.md (7 workflows + exclusion rules)
652
+ metrics:
653
+ - name: workflow_eval_pass_rate
654
+ source: homunculus/evolved/evals/workflows.eval.yaml
655
+ healthy: ">= 90%"
656
+ - name: wrong_workflow_rate
657
+ source: manual observation
658
+ healthy: "< 5%"
659
+
660
+ workflow_adaptation:
661
+ purpose: "Automatically adjust workflows based on usage data (skip rates, failure patterns)"
662
+ realized_by: "/api/workflow/track + /api/workflow/stats (recommendations: skip_rate > 50% triggers make_optional/remove)"
663
+ metrics:
664
+ - name: step_skip_rate
665
+ source: /api/workflow/stats
666
+ healthy: "no step skipped > 50% of the time"
667
+
668
+ context_routing:
669
+ purpose: "Route requests to appropriate tools, models, and subagents"
670
+ realized_by: CLAUDE.md model guidelines + subagent definitions + multi-agent-design-patterns skill
671
+ metrics:
672
+ - name: model_appropriateness
673
+ source: observations.jsonl (model usage vs task complexity)
674
+ healthy: "Sonnet ~90%, Opus ~8%, Haiku ~2%"
675
+
676
+ process_governance:
677
+ purpose: "Enforce quality gates and verification loops at the right moments"
678
+ realized_by: /quality-gate + /forge-dev + development-verification-patterns skill
679
+ metrics:
680
+ - name: gate_compliance
681
+ source: forge review pass_rate
682
+ healthy: ">= 80%"
683
+
684
+ # ───────────────────────────────────────────
685
+ # 9. Self-Awareness (meta)
686
+ # ───────────────────────────────────────────
687
+ self_awareness:
688
+ purpose: "Understand own architecture to make safe, informed evolution decisions"
689
+ realized_by: architecture.yaml (this file) + assistant-system-management skill
690
+ metrics:
691
+ - name: architecture_freshness
692
+ source: architecture.yaml last-modified
693
+ healthy: "reflects current system state"
694
+ goals:
695
+ goal_assessment:
696
+ purpose: "Evaluate each goal's health and find improvement opportunities"
697
+ realized_by: scripts/goal-health-check.sh + scripts/architecture-orphan-check.sh + /harness-audit + /health commands + quest-board/stats.js (computeEvolutionMetrics)
698
+ metrics:
699
+ - name: goals_with_metrics
700
+ source: architecture.yaml
701
+ healthy: "> 80% of leaf goals have metrics"
702
+ impact_analysis:
703
+ purpose: "Before changing a system, understand what goals it affects"
704
+ realized_by: scripts/goal-impact.sh + decisions/ (ADR)
705
+ metrics:
706
+ - name: safe_change_rate
707
+ source: git history (reverts, hotfixes)
708
+ healthy: "< 5% changes reverted"
709
+ architecture_review_triggers:
710
+ purpose: "Detect when incremental optimization is insufficient and architecture-level redesign is needed"
711
+ realized_by: scripts/architecture-review-triggers.sh + heartbeat/prompts/p1.md (Section G)
712
+ metrics:
713
+ - name: trigger_detection
714
+ source: scripts/architecture-review-triggers.sh output
715
+ healthy: "triggers are detected and surfaced in night report suggestions"
716
+
717
+ # ───────────────────────────────────────────
718
+ # 10. Personal Growth
719
+ # ───────────────────────────────────────────
720
+ personal_growth:
721
+ purpose: "Build knowledge and capabilities that lead to meaningful projects"
722
+
723
+ goals:
724
+ knowledge_base:
725
+ purpose: "Zettelkasten knowledge graph with typed connections and structure notes"
726
+ realized_by: knowledge-cards/ + scripts/build-index.js + /study command + .claude/rules/knowledge-management.md
727
+ metrics:
728
+ - name: card_count
729
+ source: knowledge-cards/index.json stats.total_cards
730
+ healthy: ">= 10"
731
+ - name: domain_coverage
732
+ source: knowledge-cards/index.json stats.domains
733
+ healthy: ">= 3 domains"
734
+ - name: connection_density
735
+ source: knowledge-cards/index.json stats.avg_connections
736
+ healthy: ">= 2.0 links/card"
737
+ - name: structure_note_count
738
+ source: knowledge-cards/index.json stats.total_structure_notes
739
+ healthy: ">= 3"
740
+ - name: orphan_count
741
+ source: knowledge-cards/index.json stats.orphans
742
+ healthy: "0"
743
+ health_check:
744
+ command: "cd ~/assistant/knowledge-cards && node scripts/build-index.js 2>&1 | grep -q 'index.json built'"
745
+ expected: "build-index.js runs successfully and produces index.json"
746
+
747
+ project_pipeline:
748
+ purpose: "From knowledge intersection to active project proposals"
749
+ realized_by: knowledge-cards/ connections + /create-project command
750
+ metrics:
751
+ - name: candidate_review_rate
752
+ source: knowledge-cards/candidates/
753
+ healthy: "candidates reviewed within 3 days"
754
+
755
+ active_projects:
756
+ purpose: "Track and advance projects that solve real problems"
757
+ realized_by: projects/
758
+ goals:
759
+ chapterly:
760
+ purpose: "iOS novel reading app — solve poor reading experience on mobile"
761
+ realized_by: projects/chapterly/
762
+ metrics:
763
+ - name: project_status
764
+ source: projects/chapterly/plan.md
765
+ healthy: "milestones defined and tracked"
766
+ # Knowledge cards: reading-app-pain-point × ios-app-development
767
+
768
+ ai_trading_strategies:
769
+ purpose: "AI autonomous strategy discovery — no predefined indicators, evolve and validate"
770
+ realized_by: projects/ai-trading-strategies/
771
+ metrics:
772
+ - name: report_freshness
773
+ source: projects/ai-trading-strategies/reports/
774
+ healthy: "report generated within 24h"
775
+ - name: strategy_evolution
776
+ source: projects/ai-trading-strategies/
777
+ healthy: "backtest + evolution loop operational"
778
+ # Knowledge cards: ai-strategy-evolution × binance-api × tradingview-api × blockchain-defi-experience
779
+ # Also under: autonomous_action.scheduled_projects (scheduling)
780
+
781
+ manual_trading_strategies:
782
+ purpose: "Human hypothesis-driven strategy backtesting with custom indicators and position sizing"
783
+ realized_by: projects/manual-trading-strategies/
784
+ metrics:
785
+ - name: hypotheses_tested
786
+ source: projects/manual-trading-strategies/README.md
787
+ healthy: "at least 1 hypothesis backtested"
788
+ # Knowledge cards: binance-api × ai-crypto-analysis × tradingview-api
789
+ # Complements: ai_trading_strategies (AI zero-knowledge) vs this (human intuition)
790
+
791
+ ai_trading_executor:
792
+ purpose: "Paper/live trading — execute graduated strategies on real markets"
793
+ realized_by: projects/ai-trading-executor/ + launchd(trading-executor, trading-executor-daily)
794
+ metrics:
795
+ - name: scanner_active
796
+ command: "launchctl print gui/501/com.jassistant.trading-executor 2>/dev/null | grep -q 'state = not running'"
797
+ expected: "launchd job loaded"
798
+ - name: paper_trades
799
+ source: projects/ai-trading-executor/data/trades/
800
+ healthy: "paper trades being recorded"
801
+ # Knowledge cards: binance-api × ai-strategy-evolution × blockchain-defi-experience
802
+ # Depends on: ai_trading_strategies (graduated strategies)
803
+
804
+ self_evolution:
805
+ purpose: "Build a self-improving AI assistant system"
806
+ realized_by: projects/self-evolution/
807
+ metrics:
808
+ - name: plan_progress
809
+ source: projects/self-evolution/plan.md
810
+ healthy: "active milestones tracked"
811
+ # Knowledge cards: ai-self-evolution × ai-agent-tooling-gap
812
+
813
+ jarvis_dashboard:
814
+ purpose: "3D knowledge graph visualization — make system topology explorable"
815
+ realized_by: projects/jarvis-dashboard/ (launchd)
816
+ metrics:
817
+ - name: dashboard_running
818
+ source: launchctl list com.jinx.jarvis-dashboard
819
+ healthy: "process running"
820
+
821
+ homunculus_oss:
822
+ purpose: "Open-source self-evolving AI assistant + influence building"
823
+ realized_by: projects/homunculus/
824
+
825
+ goals:
826
+ open_source_repo:
827
+ purpose: "Publish minimal seed repo with extreme low setup barrier"
828
+ realized_by: GitHub repo homunculus + npx init
829
+ metrics:
830
+ - name: github_stars
831
+ source: gh api repos/{owner}/homunculus
832
+ healthy: "> 100 (Phase 1), > 1000 (Phase 2)"
833
+ - name: setup_time
834
+ source: manual test
835
+ healthy: "< 5 minutes from clone to first evolution"
836
+
837
+ upstream_sync:
838
+ purpose: "Keep homunculus repo updated with self-evolution improvements"
839
+ realized_by: manual review + future automation script
840
+ metrics:
841
+ - name: sync_lag
842
+ source: git diff between internal and public repo
843
+ healthy: "< 2 weeks behind self-evolution core changes"
844
+
845
+ community:
846
+ purpose: "Build and maintain developer community around homunculus"
847
+ realized_by: GitHub discussions + Twitter + Reddit + blog + nightly community monitoring
848
+ metrics:
849
+ - name: weekly_content
850
+ source: projects/homunculus/drafts/
851
+ healthy: ">= 2 posts per week (auto-drafted by assistant)"
852
+ - name: response_time
853
+ source: GitHub issues
854
+ healthy: "< 48h for first response"
855
+
856
+ influence:
857
+ purpose: "Establish personal authority in AI deep-usage space"
858
+ realized_by: content strategy + community engagement
859
+ metrics:
860
+ - name: twitter_followers
861
+ source: Twitter/X profile
862
+ healthy: "> 500 (Phase 2)"
863
+ - name: hn_appearances
864
+ source: nightly agent HN scan
865
+ healthy: ">= 1 front page"
866
+
867
+ monetization:
868
+ purpose: "Explore knowledge monetization paths based on audience feedback"
869
+ realized_by: TBD (course / consulting / product / newsletter)
870
+ # Status: Phase 3 — not started, depends on community traction
871
+
872
+ xiao_j_evolution:
873
+ purpose: "Phase 1 character evolution plan (archived — absorbed into self-evolution + knowledge-cards)"
874
+ realized_by: projects/xiao-j-evolution/
875
+ # Status: archived 2026-03-18
876
+
877
+
878
+ # ═══════════════════════════════════════════════
879
+ # Test runner configuration
880
+ # ═══════════════════════════════════════════════
881
+ test_config:
882
+ quest_board:
883
+ command: "node --experimental-test-isolation=none --test --test-concurrency=1 tests/*.test.js"
884
+ cwd: quest-board/
885
+ count: 125
886
+ expected_duration: "< 10s"