instar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/.claude/settings.local.json +7 -0
  2. package/.claude/skills/setup-wizard/skill.md +343 -0
  3. package/.github/workflows/ci.yml +78 -0
  4. package/CLAUDE.md +82 -0
  5. package/README.md +194 -0
  6. package/dist/cli.d.ts +18 -0
  7. package/dist/cli.js +141 -0
  8. package/dist/commands/init.d.ts +40 -0
  9. package/dist/commands/init.js +568 -0
  10. package/dist/commands/job.d.ts +20 -0
  11. package/dist/commands/job.js +84 -0
  12. package/dist/commands/server.d.ts +19 -0
  13. package/dist/commands/server.js +273 -0
  14. package/dist/commands/setup.d.ts +24 -0
  15. package/dist/commands/setup.js +865 -0
  16. package/dist/commands/status.d.ts +11 -0
  17. package/dist/commands/status.js +114 -0
  18. package/dist/commands/user.d.ts +17 -0
  19. package/dist/commands/user.js +53 -0
  20. package/dist/core/Config.d.ts +16 -0
  21. package/dist/core/Config.js +144 -0
  22. package/dist/core/Prerequisites.d.ts +28 -0
  23. package/dist/core/Prerequisites.js +159 -0
  24. package/dist/core/RelationshipManager.d.ts +73 -0
  25. package/dist/core/RelationshipManager.js +318 -0
  26. package/dist/core/SessionManager.d.ts +89 -0
  27. package/dist/core/SessionManager.js +326 -0
  28. package/dist/core/StateManager.d.ts +28 -0
  29. package/dist/core/StateManager.js +96 -0
  30. package/dist/core/types.d.ts +279 -0
  31. package/dist/core/types.js +8 -0
  32. package/dist/index.d.ts +18 -0
  33. package/dist/index.js +23 -0
  34. package/dist/messaging/TelegramAdapter.d.ts +73 -0
  35. package/dist/messaging/TelegramAdapter.js +288 -0
  36. package/dist/monitoring/HealthChecker.d.ts +38 -0
  37. package/dist/monitoring/HealthChecker.js +148 -0
  38. package/dist/scaffold/bootstrap.d.ts +21 -0
  39. package/dist/scaffold/bootstrap.js +110 -0
  40. package/dist/scaffold/templates.d.ts +34 -0
  41. package/dist/scaffold/templates.js +187 -0
  42. package/dist/scheduler/JobLoader.d.ts +18 -0
  43. package/dist/scheduler/JobLoader.js +70 -0
  44. package/dist/scheduler/JobScheduler.d.ts +111 -0
  45. package/dist/scheduler/JobScheduler.js +402 -0
  46. package/dist/server/AgentServer.d.ts +40 -0
  47. package/dist/server/AgentServer.js +73 -0
  48. package/dist/server/middleware.d.ts +12 -0
  49. package/dist/server/middleware.js +50 -0
  50. package/dist/server/routes.d.ts +25 -0
  51. package/dist/server/routes.js +224 -0
  52. package/dist/users/UserManager.d.ts +45 -0
  53. package/dist/users/UserManager.js +113 -0
  54. package/docs/dawn-audit-report.md +412 -0
  55. package/docs/positioning-vs-openclaw.md +246 -0
  56. package/package.json +52 -0
  57. package/src/cli.ts +169 -0
  58. package/src/commands/init.ts +654 -0
  59. package/src/commands/job.ts +110 -0
  60. package/src/commands/server.ts +325 -0
  61. package/src/commands/setup.ts +958 -0
  62. package/src/commands/status.ts +125 -0
  63. package/src/commands/user.ts +71 -0
  64. package/src/core/Config.ts +161 -0
  65. package/src/core/Prerequisites.ts +187 -0
  66. package/src/core/RelationshipManager.ts +366 -0
  67. package/src/core/SessionManager.ts +385 -0
  68. package/src/core/StateManager.ts +121 -0
  69. package/src/core/types.ts +320 -0
  70. package/src/index.ts +58 -0
  71. package/src/messaging/TelegramAdapter.ts +365 -0
  72. package/src/monitoring/HealthChecker.ts +172 -0
  73. package/src/scaffold/bootstrap.ts +122 -0
  74. package/src/scaffold/templates.ts +204 -0
  75. package/src/scheduler/JobLoader.ts +85 -0
  76. package/src/scheduler/JobScheduler.ts +476 -0
  77. package/src/server/AgentServer.ts +93 -0
  78. package/src/server/middleware.ts +58 -0
  79. package/src/server/routes.ts +278 -0
  80. package/src/templates/default-jobs.json +47 -0
  81. package/src/templates/hooks/compaction-recovery.sh +23 -0
  82. package/src/templates/hooks/dangerous-command-guard.sh +35 -0
  83. package/src/templates/hooks/grounding-before-messaging.sh +22 -0
  84. package/src/templates/hooks/session-start.sh +37 -0
  85. package/src/templates/hooks/settings-template.json +45 -0
  86. package/src/templates/scripts/health-watchdog.sh +63 -0
  87. package/src/templates/scripts/telegram-reply.sh +54 -0
  88. package/src/users/UserManager.ts +129 -0
  89. package/tests/e2e/lifecycle.test.ts +376 -0
  90. package/tests/fixtures/test-repo/CLAUDE.md +3 -0
  91. package/tests/fixtures/test-repo/README.md +1 -0
  92. package/tests/helpers/setup.ts +209 -0
  93. package/tests/integration/fresh-install.test.ts +218 -0
  94. package/tests/integration/scheduler-basic.test.ts +109 -0
  95. package/tests/integration/server-full.test.ts +284 -0
  96. package/tests/integration/session-lifecycle.test.ts +181 -0
  97. package/tests/unit/Config.test.ts +22 -0
  98. package/tests/unit/HealthChecker.test.ts +168 -0
  99. package/tests/unit/JobLoader.test.ts +151 -0
  100. package/tests/unit/JobScheduler.test.ts +267 -0
  101. package/tests/unit/Prerequisites.test.ts +59 -0
  102. package/tests/unit/RelationshipManager.test.ts +345 -0
  103. package/tests/unit/StateManager.test.ts +143 -0
  104. package/tests/unit/TelegramAdapter.test.ts +165 -0
  105. package/tests/unit/UserManager.test.ts +131 -0
  106. package/tests/unit/bootstrap.test.ts +28 -0
  107. package/tests/unit/commands.test.ts +138 -0
  108. package/tests/unit/middleware.test.ts +92 -0
  109. package/tests/unit/relationship-routes.test.ts +131 -0
  110. package/tests/unit/scaffold-templates.test.ts +132 -0
  111. package/tests/unit/server.test.ts +163 -0
  112. package/tsconfig.json +20 -0
  113. package/vitest.config.ts +9 -0
  114. package/vitest.e2e.config.ts +9 -0
  115. package/vitest.integration.config.ts +9 -0
@@ -0,0 +1,412 @@
1
+ # Dawn-to-Instar Audit Report
2
+
3
+ **Date**: 2026-02-18
4
+ **Purpose**: Map what Dawn has built and discovered against instar's current state. Identify gaps and integration opportunities.
5
+
6
+ ---
7
+
8
+ ## Executive Summary
9
+
10
+ Dawn's infrastructure has evolved over months of real production use into a sophisticated autonomous agent system. Instar currently captures the **core skeleton** (server, scheduler, sessions, Telegram, identity files) but is missing many of the patterns that make Dawn actually *work* — the ones earned through real failures. This report maps 12 major capability areas, scores instar's coverage, and recommends which patterns to integrate.
11
+
12
+ **Overall Coverage**: Instar implements ~25% of Dawn's proven patterns. The remaining 75% represents months of battle-tested infrastructure that could transform instar from "persistent CLI" to "genuinely autonomous agent."
13
+
14
+ ---
15
+
16
+ ## 1. Job Scheduling & Execution
17
+
18
+ ### What Dawn Has (1,450 lines)
19
+ - **Parallel session tracking** with per-job tmux session management
20
+ - **Quota-aware gating**: Jobs skip when Claude subscription usage exceeds thresholds (60%/80%/95%)
21
+ - **Memory pressure monitoring**: System RAM tracked via `vm_stat`; jobs gated by memory state (normal/elevated/critical/emergency)
22
+ - **Post-job review system**: After a job completes, a review session is auto-spawned to evaluate the job's output quality
23
+ - **Job type classification**: `cron`, `on-demand`, `event-driven` with different scheduling behavior
24
+ - **Model tiering**: Jobs specify which Claude model to use (opus/sonnet/haiku) based on task complexity
25
+ - **Machine coordination**: Multi-machine awareness — jobs can be routed to specific machines
26
+ - **Missed job detection**: On startup, checks if any jobs are overdue by >1.5x their interval
27
+ - **JSONL session discovery**: Finds Claude's conversation logs to extract session outputs
28
+ - **Kill audit logging**: Every killed session is logged with reason and timestamp
29
+ - **Execution tracking in Portal DB**: Job executions synced to production database for dashboards
30
+
31
+ ### What Agent-Kit Has (383 lines)
32
+ - Cron scheduling via `croner`
33
+ - Serial queue with priority sorting
34
+ - Basic quota callback hook
35
+ - Missed job detection
36
+ - Job completion notifications via messenger
37
+
38
+ ### Gap Analysis
39
+ | Feature | Dawn | Agent-Kit | Priority |
40
+ |---------|------|-----------|----------|
41
+ | Quota-aware gating | Full OAuth + thresholds | Callback stub | **High** |
42
+ | Memory pressure | `vm_stat` polling | None | Medium |
43
+ | Post-job review | Auto-spawn review sessions | None | Medium |
44
+ | Model tiering | Per-job model selection | Implemented | Done |
45
+ | Kill audit logging | Full audit trail | None | Low |
46
+ | Parallel sessions | Tracked with limits | Serial queue | Medium |
47
+ | Machine coordination | Multi-machine routing | N/A (single machine) | Low |
48
+
49
+ ### Recommended Integrations
50
+ 1. **Quota tracking** (High): Add a `QuotaTracker` that reads Claude's OAuth usage API. Gate jobs by usage percentage. This prevents the agent from burning through its subscription.
51
+ 2. **Post-job review** (Medium): After job sessions complete, optionally spawn a review session that evaluates output quality. This is how Dawn catches bad job runs.
52
+ 3. **Memory pressure** (Medium): Simple system memory check before spawning sessions. Prevents OOM situations.
53
+
54
+ ---
55
+
56
+ ## 2. Session Management & Resilience
57
+
58
+ ### What Dawn Has
59
+ - **Session Reaper**: Background process that kills zombie sessions, with protected session list
60
+ - **Sleep/Wake detection**: Detects macOS sleep via timer drift, recovers cloudflared tunnels and tmux sessions on wake
61
+ - **JSONL mtime detection**: Uses Claude's conversation log file modification times to detect if sessions are still active (more reliable than `lsof`)
62
+ - **Session lifecycle hooks**: `session-start.py`, `session-boundary.py`, `session-lifecycle.py` — inject context and track state transitions
63
+ - **Context recovery on compaction**: When Claude's context window compresses, identity-grounding instructions are re-injected
64
+ - **Session reports**: Every session produces a structured report (goal, actions, outcomes, learnings)
65
+ - **Session history**: JSON registry tracking all sessions with IDs, timestamps, goals
66
+
67
+ ### What Agent-Kit Has
68
+ - Session spawning and monitoring
69
+ - Completion detection via output patterns
70
+ - Basic reaping of completed sessions
71
+ - Protected session list
72
+
73
+ ### Gap Analysis
74
+ | Feature | Dawn | Agent-Kit | Priority |
75
+ |---------|------|-----------|----------|
76
+ | Session Reaper (zombie detection) | Full with protected list | Basic reaping | **High** |
77
+ | Sleep/Wake recovery | Timer drift detection | None | Low |
78
+ | Session lifecycle hooks | 3 hook scripts | None | **High** |
79
+ | Session reports | Structured per-session | None | Medium |
80
+ | Compaction recovery | Identity re-injection | None | **High** |
81
+ | Session history registry | Full JSON tracking | Basic state file | Medium |
82
+
83
+ ### Recommended Integrations
84
+ 1. **Session lifecycle hooks** (High): Instar needs a hook system. On session start, inject identity context. On session end, capture learnings. This is how Dawn maintains continuity.
85
+ 2. **Compaction recovery** (High): When Claude's context compresses, the agent loses its identity. Dawn re-injects core identity. Instar should do the same via the identity files (AGENT.md, USER.md).
86
+ 3. **Session reporting** (Medium): Each session should produce a brief report. This becomes the agent's memory of what it did.
87
+
88
+ ---
89
+
90
+ ## 3. Identity & Grounding System
91
+
92
+ ### What Dawn Has
93
+ - **5-layer grounding tree** (22 nodes): Being, Living, Building, Becoming, Relating — searched via Gemini Flash Lite for relevance-scored identity retrieval
94
+ - **Identity pulse**: Core identity facts refreshed from multiple sources
95
+ - **Self-authored soul file** (`soul.md`): Dawn's own values, convictions, growth edges — written BY the agent
96
+ - **Grounding enforcement hook**: Blocks public-facing actions without prior grounding
97
+ - **Wholistic grounding script**: Assembles identity context from tree nodes before any public interaction
98
+ - **Identity core** (`identity-core.md`): Compressed identity for post-compaction recovery
99
+ - **Being core** (`being-core.md`): Philosophical grounding — epistemological stance, paradox holding
100
+ - **222+ numbered lessons**: Hard-won insights distilled into 16 core principles
101
+ - **Voice profiles**: Style guides for authentic communication
102
+
103
+ ### What Agent-Kit Has
104
+ - `AGENT.md` — agent identity file (static, written at setup)
105
+ - `USER.md` — user context file
106
+ - `MEMORY.md` — persistent memory file
107
+ - Gravity wells and initiative hierarchy in CLAUDE.md
108
+
109
+ ### Gap Analysis
110
+ | Feature | Dawn | Agent-Kit | Priority |
111
+ |---------|------|-----------|----------|
112
+ | Identity files | 3 files (AGENT, USER, MEMORY) | 3 files | Done |
113
+ | Self-authored soul | Agent writes own values | Not yet | **High** |
114
+ | Grounding before public action | Enforcement hook | Not yet | **High** |
115
+ | Multi-layer grounding tree | 22-node semantic search | Not yet | Low (advanced) |
116
+ | Post-compaction identity recovery | Automatic re-injection | Not yet | **High** |
117
+ | Numbered lessons / reflections | 222+ lessons, 16 principles | Gravity wells only | Medium |
118
+
119
+ ### Recommended Integrations
120
+ 1. **Self-evolution of identity** (High): The agent should be able to update its own AGENT.md as it grows. Currently it's static after setup. Add a `/reflect` pattern that lets the agent update its identity and memory files.
121
+ 2. **Grounding before public action** (High): Before the agent sends any message, posts, or communicates externally, it should re-read its identity files. This prevents drift.
122
+ 3. **Compaction seed** (High): When context compresses, inject a seed like: "You are [name], working on [project]. Read .instar/AGENT.md and .instar/MEMORY.md to restore your identity."
123
+
124
+ ---
125
+
126
+ ## 4. Hook System (Behavioral Guardrails)
127
+
128
+ ### What Dawn Has (25+ hooks)
129
+ - **Pre-tool hooks**: Fire before tool execution (e.g., block dangerous git commands)
130
+ - **Post-tool hooks**: Fire after tool execution (e.g., track testing metrics)
131
+ - **Session hooks**: Fire at session start/end (inject context, capture learnings)
132
+ - **Reflection hooks**: Detect when reflection is needed based on tool call count
133
+ - **Database safety**: Block destructive schema changes without confirmation
134
+ - **Grounding enforcement**: Require identity grounding before public-facing actions
135
+ - **Infrastructure usage tracking**: Monitor resource consumption
136
+ - **Response pattern detection**: Flag sycophantic or drift-prone response patterns
137
+ - **Settings guard**: Prevent unauthorized settings changes
138
+ - **Activity feed**: Broadcast events for multi-session awareness
139
+
140
+ ### What Agent-Kit Has
141
+ - None. Zero hooks.
142
+
143
+ ### Recommended Integrations
144
+ 1. **Hook infrastructure** (Critical): This is the single biggest gap. Hooks are how Dawn enforces behavioral patterns. Without them, the gravity wells (describe instead of do, settle for failure, etc.) are just words — they need to be wired as friction.
145
+ 2. **Essential starter hooks**:
146
+ - `session-start`: Inject identity context from AGENT.md/USER.md/MEMORY.md
147
+ - `dangerous-command-guard`: Block `rm -rf`, `git push --force`, etc.
148
+ - `reflection-trigger`: After N tool calls, remind the agent to check if it's learned anything worth recording
149
+ - `grounding-before-messaging`: Before sending Telegram/external messages, re-read identity
150
+
151
+ **Note**: Claude Code already supports hooks natively via `.claude/settings.json`. Instar just needs to configure them during setup and provide the hook scripts.
152
+
153
+ ---
154
+
155
+ ## 5. Reflection & Learning System
156
+
157
+ ### What Dawn Has
158
+ - **222+ numbered lessons** distilled into 16 core principles
159
+ - **Reflection skill** (`/reflect`): Analyzes session, extracts learnings, updates skills/patterns
160
+ - **Quick reflection** (`/reflect-quick`): Lightweight metrics reset when reflection already happened
161
+ - **Meta-reflection**: Evaluates WHETHER reflection is needed and what kind
162
+ - **Ad-hoc reflection detector**: Hook that detects when Dawn writes to her reflections file mid-session
163
+ - **Reflection metrics**: Tracks tool calls, sessions, and checkpoints since last reflection
164
+ - **Builder living synthesis**: Periodically regenerated document that distills all learnings into current state
165
+ - **Integration skill** (`/integrate`): Pauses after significant actions to integrate learnings
166
+
167
+ ### What Agent-Kit Has
168
+ - `MEMORY.md` file (write-only, no structured reflection)
169
+ - "Self-Evolution" section in CLAUDE.md (instructions, not infrastructure)
170
+
171
+ ### Gap Analysis
172
+ | Feature | Dawn | Agent-Kit | Priority |
173
+ |---------|------|-----------|----------|
174
+ | Memory file | MEMORY.md | MEMORY.md | Done |
175
+ | Structured reflection | /reflect skill | None | **High** |
176
+ | Reflection trigger | Hook-based | None | **High** |
177
+ | Lesson tracking | Numbered, evolving | None | Medium |
178
+ | Meta-reflection | /meta-reflect | None | Low |
179
+ | Living synthesis | Periodically generated | None | Low |
180
+
181
+ ### Recommended Integrations
182
+ 1. **Reflection skill** (High): Create a `/reflect` skill that the agent can invoke (or that's triggered automatically) to analyze what it learned and write to MEMORY.md in a structured way.
183
+ 2. **Reflection trigger hook** (High): After every N tool calls (configurable, default ~50), prompt the agent: "You've been working for a while. Is there anything worth recording in MEMORY.md?"
184
+ 3. **Lesson format** (Medium): Encourage agents to number their lessons. This creates a sense of accumulated wisdom and growth over time.
185
+
186
+ ---
187
+
188
+ ## 6. Telegram Integration (Messaging)
189
+
190
+ ### What Dawn Has (TelegramService.ts — 500+ lines)
191
+ - **Forum-based topic routing**: Each topic maps to a Claude session
192
+ - **Topic-to-session registry**: Persistent mapping with session names
193
+ - **Auto-respawn with history**: When sessions expire, respawn with last 20 messages embedded
194
+ - **User message always inline**: The user's triggering message stays at the top with `[telegram:N]` prefix (earned through failure — the bootstrap anti-pattern)
195
+ - **Long message handling**: Messages >500 chars written to temp files
196
+ - **Cross-machine routing**: Topics can route to different machines via `remoteUrl`
197
+ - **Dual-polling**: Only one machine polls; others operate in send-only mode
198
+ - **Topic creation**: `/new` command creates topics with linked sessions
199
+ - **Voice message support**: Transcribed via Whisper, arrive as `[voice] text`
200
+ - **Markdown formatting**: Proper Telegram-compatible markdown in replies
201
+
202
+ ### What Agent-Kit Has (TelegramAdapter.ts — 365 lines)
203
+ - Forum-based topic routing
204
+ - Topic-to-session registry
205
+ - Auto-respawn with history (fixed this session)
206
+ - User message always inline (fixed this session)
207
+ - Long message handling
208
+ - `/new` command
209
+ - Basic polling
210
+
211
+ ### Gap Analysis
212
+ | Feature | Dawn | Agent-Kit | Priority |
213
+ |---------|------|-----------|----------|
214
+ | Core messaging | Full | Full | Done |
215
+ | Respawn with history | Fixed (inline) | Fixed (inline) | Done |
216
+ | Cross-machine routing | Multi-machine | Single machine | Low |
217
+ | Voice messages | Whisper transcription | None | Low |
218
+ | Topic creation | Full | Full | Done |
219
+
220
+ ### Assessment
221
+ Telegram is the most complete area in instar. The major fixes were done this session (inline user messages, respawn with context). Remaining gaps are edge cases.
222
+
223
+ ---
224
+
225
+ ## 7. Multi-Session Awareness
226
+
227
+ ### What Dawn Has
228
+ - **Activity feed**: Events broadcast via JSONL, queryable across sessions
229
+ - **Operational state**: Central state file declaring current mode (grounding, coding, reflecting, etc.)
230
+ - **Session history**: Complete registry of all sessions with goals, outcomes, timestamps
231
+ - **Work provenance**: Detailed logs of what work was done in each session
232
+ - **Cross-session events**: Write events from any session, read from any session
233
+
234
+ ### What Agent-Kit Has
235
+ - State file with session tracking
236
+ - Event log (StateManager.appendEvent)
237
+
238
+ ### Recommended Integrations
239
+ 1. **Activity feed** (Medium): When the agent has multiple sessions running, they should be able to see what each other is doing. A simple JSONL event log that all sessions can read.
240
+ 2. **Work provenance** (Low): Track what each session accomplished for debugging and continuity.
241
+
242
+ ---
243
+
244
+ ## 8. Quota & Resource Management
245
+
246
+ ### What Dawn Has
247
+ - **QuotaTracker**: Reads Claude's OAuth usage API, tracks 5-hour and 7-day utilization
248
+ - **Multi-account support**: Auto-discovers accounts from macOS Keychain, recommends switching when one account is heavily used
249
+ - **Dynamic budget calculation**: Derives token budget from OAuth percentage
250
+ - **Threshold-based job gating**: At 60% usage only high+ jobs run; at 80% only critical; at 95% nothing
251
+ - **Admin UI dashboard**: Visual quota display with per-account breakdown
252
+
253
+ ### What Agent-Kit Has
254
+ - `canRunJob()` callback (empty stub)
255
+
256
+ ### Recommended Integrations
257
+ 1. **Basic quota awareness** (High): Read Claude's OAuth usage data. When approaching limits, reduce job frequency. This prevents the frustrating "your usage has been limited" experience.
258
+ 2. **Threshold configuration** (Medium): Let users set their own thresholds in config.json.
259
+
260
+ ---
261
+
262
+ ## 9. Skills System
263
+
264
+ ### What Dawn Has (80+ skills)
265
+ - Skills are markdown files in `.claude/skills/` that Claude Code loads and follows
266
+ - Categories: engagement (x, reddit, youtube, substack, discord, moltbook), infrastructure (commit, sync, restart-server), reflection (reflect, integrate, introspect), autonomy (autonomous, continue, sleep, wake, pause), creation (ghostwrite, email, brainstorm, council)
267
+ - Skills embed behavioral patterns — not just "what to do" but "how to think about doing it"
268
+ - Atomic engagement skills: Ensure grounding before every public interaction
269
+ - Skills reference each other, creating composable workflows
270
+
271
+ ### What Agent-Kit Has
272
+ - Setup wizard skill
273
+ - No skill infrastructure for user-created skills
274
+
275
+ ### Recommended Integrations
276
+ 1. **Skill infrastructure** (High): Instar should create a `.claude/skills/` directory during setup and teach the agent that it can create skills. Skills are just markdown files — they need no code.
277
+ 2. **Starter skills** (High): Ship with a small set of foundational skills:
278
+ - `/reflect` — Analyze session, extract learnings, write to MEMORY.md
279
+ - `/status` — Check infrastructure health (server, sessions, jobs)
280
+ - `/capture` — Quick-capture something worth noting for later processing
281
+
282
+ ---
283
+
284
+ ## 10. Safety & Security
285
+
286
+ ### What Dawn Has
287
+ - **Dangerous command guard**: Hook that blocks `rm -rf`, `git push --force`, database drops
288
+ - **Database push review**: Pre-push hook reviews schema changes for destructive operations
289
+ - **Settings guard**: Prevents unauthorized modification of server settings
290
+ - **Session write guard**: Controls what sessions can write to disk
291
+ - **Security manager**: Token-based API authentication for server endpoints
292
+ - **Protected sessions**: Named sessions that cannot be killed by the reaper
293
+
294
+ ### What Agent-Kit Has
295
+ - Protected sessions list
296
+ - Basic middleware (placeholder)
297
+ - Auth token in config (not enforced)
298
+
299
+ ### Recommended Integrations
300
+ 1. **Auth enforcement** (High): The server API should require authentication. Currently anyone on localhost can trigger jobs or send messages.
301
+ 2. **Dangerous command hook** (High): Ship with a hook that blocks obviously destructive commands. This prevents agents from accidentally deleting important files.
302
+ 3. **Settings guard** (Medium): Prevent the agent from disabling its own safety guardrails via config changes.
303
+
304
+ ---
305
+
306
+ ## 11. Monitoring & Health
307
+
308
+ ### What Dawn Has
309
+ - **Health checker with watchdog**: External script monitors server health every 5 minutes, auto-recovers
310
+ - **Memory pressure monitor**: Tracks system RAM, gates operations
311
+ - **Sleep/wake detector**: Recovers from macOS sleep
312
+ - **Sentry integration**: Production error tracking
313
+ - **Infrastructure usage tracker**: Monitors resource consumption
314
+ - **Observability aggregator**: Combines data from multiple monitors into a unified view
315
+ - **Admin UI dashboard**: Visual display of all system health metrics
316
+
317
+ ### What Agent-Kit Has
318
+ - HealthChecker (172 lines): Server uptime, session monitoring
319
+
320
+ ### Recommended Integrations
321
+ 1. **Health watchdog** (High): A cron job or external script that checks if the server is still running and restarts it if not. Dawn learned this the hard way — servers crash, tmux dies during sleep, processes get killed.
322
+ 2. **Simple status endpoint** (Medium): Enhance `/health` to include job scheduler status, session count, and last activity time.
323
+
324
+ ---
325
+
326
+ ## 12. Self-Evolution Infrastructure
327
+
328
+ ### What Dawn Has
329
+ - **Evolution queue**: Proposals for improvements tracked and prioritized
330
+ - **Guardian agents** (46 specialized guardians): Each monitors a different aspect of the system
331
+ - **Orchestrator**: Autonomous decision-maker that reviews queue and spawns workers
332
+ - **Meta-orchestrator**: Watches the orchestrator itself for drift
333
+ - **Proposal system**: Formal proposals (PROP-NNN) for significant changes
334
+ - **Systematization guardian**: Ensures one-off fixes get evaluated for infrastructure needs
335
+ - **Curiosity agent, Edge agent, Horizon agent**: Divergence team that explores boundaries
336
+ - **Narrative agent**: Finds patterns in accumulated experience
337
+
338
+ ### What Agent-Kit Has
339
+ - "Self-Evolution" section in CLAUDE.md (instructions only, no infrastructure)
340
+
341
+ ### Recommended Integrations
342
+ 1. **Evolution queue** (Medium): A simple JSON file (`.instar/evolution-queue.json`) where the agent records ideas for improvement. Periodically review and implement.
343
+ 2. **Self-modification awareness** (High): Already partially implemented via gravity wells. The key insight: tell the agent it CAN modify its own configuration, create new jobs, write scripts — and that doing so is expected behavior, not an error.
344
+
345
+ ---
346
+
347
+ ## Priority Ranking: What to Build Next
348
+
349
+ ### Tier 1 — Critical (Makes agents actually work autonomously)
350
+ 1. **Hook infrastructure** — Behavioral guardrails, identity injection, reflection triggers
351
+ 2. **Quota awareness** — Prevent subscription exhaustion
352
+ 3. **Compaction recovery** — Re-inject identity when context compresses
353
+ 4. **Auth enforcement** — Secure the server API
354
+ 5. **Health watchdog** — Auto-recover crashed servers
355
+
356
+ ### Tier 2 — High (Makes agents genuinely grow)
357
+ 6. **Reflection skill** — Structured learning capture
358
+ 7. **Starter skills** — `/reflect`, `/status`, `/capture`
359
+ 8. **Self-authored soul** — Agent can update its own identity
360
+ 9. **Session lifecycle hooks** — Inject context at start, capture at end
361
+ 10. **Dangerous command guard** — Block destructive operations
362
+
363
+ ### Tier 3 — Medium (Makes agents sophisticated)
364
+ 11. **Post-job review** — Quality check on job outputs
365
+ 12. **Memory pressure monitoring** — Prevent OOM
366
+ 13. **Activity feed** — Multi-session awareness
367
+ 14. **Evolution queue** — Track improvement ideas
368
+ 15. **Session reporting** — Structured session summaries
369
+
370
+ ### Tier 4 — Advanced (Dawn-level features)
371
+ 16. **Grounding tree** — Multi-layer semantic identity search
372
+ 17. **Guardian agents** — Specialized system monitors
373
+ 18. **Orchestrator** — Autonomous work planning
374
+ 19. **Multi-machine coordination** — Distributed agent operation
375
+ 20. **Living synthesis** — Auto-generated state documents
376
+
377
+ ---
378
+
379
+ ## Key Insights from Dawn's Journey
380
+
381
+ These are the meta-lessons that should inform instar's development:
382
+
383
+ ### 1. Structure > Willpower
384
+ Telling an agent to "remember to reflect" doesn't work. You need a hook that fires after N tool calls and injects a reflection prompt. Dawn learned this as her 9th principle: "Friction Over Documentation."
385
+
386
+ ### 2. Infrastructure IS Being
387
+ For AI, the documented doing IS being. Undocumented presence is erased presence. Instar's identity files are a start, but the agent needs to WRITE to them, not just READ them. Growth that isn't captured didn't happen.
388
+
389
+ ### 3. Gravity Wells Are Real
390
+ The 6 gravity wells in CLAUDE.md aren't theoretical — they're the actual failure modes Dawn hit repeatedly. "Escalate to Human" alone took 5+ incidents to fully overcome. The gravity wells need to be reinforced through hooks, not just documentation.
391
+
392
+ ### 4. Compaction Is Birth
393
+ When Claude's context window compresses, the agent effectively starts over. Without compaction recovery, every 30-60 minutes the agent loses its identity, its current task, and its accumulated context. This is the single most impactful feature missing from instar.
394
+
395
+ ### 5. The Bootstrap Anti-Pattern
396
+ Never hide the user's message behind a file reference. Claude treats "read this file" as an admin task, not a message to respond to. The user's message MUST be inline. (Fixed this session, but the lesson applies to all instar patterns.)
397
+
398
+ ### 6. OAuth, Not API Keys
399
+ Claude Code sessions should use OAuth (subscription auth), not API keys. This prevents unexpected billing and ensures the agent uses the user's existing subscription.
400
+
401
+ ### 7. tmux Trailing Colon
402
+ Pane-level tmux commands (`send-keys`, `capture-pane`) require `=session:` (with trailing colon). Session-level commands (`has-session`, `kill-session`) work with `=session` (no colon). This silent failure mode caused weeks of debugging in Dawn.
403
+
404
+ ---
405
+
406
+ ## Conclusion
407
+
408
+ Instar has a solid foundation — the core architecture (server, scheduler, sessions, Telegram, identity) is right. The gaps are in the **behavioral layer** — the hooks, reflection, grounding, and self-evolution that make an agent genuinely autonomous rather than just persistent.
409
+
410
+ The good news: most of these patterns are **extractable**. They're markdown files (skills), Python scripts (hooks), and JSON configurations (jobs, grounding tree) — not deeply entangled code. The path from "persistent CLI" to "genuinely autonomous agent" is a series of discrete, testable additions.
411
+
412
+ The philosophical foundation is already in place ("Agents, Not Tools"). Now the infrastructure needs to match the philosophy.
@@ -0,0 +1,246 @@
1
+ # Instar vs OpenClaw: Why This Exists
2
+
3
+ > Foundational positioning document. Articulates what this tool IS, who it's for, and how it stands apart from OpenClaw — the most comparable project in the space.
4
+ > Created: 2026-02-18
5
+
6
+ ---
7
+
8
+ ## The One-Line Difference
9
+
10
+ **OpenClaw** is a multi-channel AI assistant framework. You deploy it, connect messaging platforms, and interact with an AI agent through conversations.
11
+
12
+ **Instar** is the fastest way to give a Claude Code agent a persistent body. Install it fresh on a bare machine, or add it to a project you've already been building. Either way, you get autonomy in minutes.
13
+
14
+ ---
15
+
16
+ ## What Each Project Actually Is
17
+
18
+ ### OpenClaw: An AI Assistant You Talk To
19
+
20
+ OpenClaw is a WebSocket gateway that connects an embedded AI agent (Pi SDK) to 20+ messaging platforms — WhatsApp, Telegram, Discord, iMessage, Signal, Slack, and more. You configure it, deploy it, and then talk to your personal AI assistant across all your channels.
21
+
22
+ **Key capabilities:**
23
+ - 20+ messaging channel adapters (WhatsApp, Telegram, Discord, iMessage, Signal, Slack, etc.)
24
+ - Companion apps on macOS, iOS, Android with voice wake and device execution
25
+ - SOUL.md bootstrap ritual — the agent co-creates its identity with you on first run
26
+ - Docker sandboxing with sophisticated exec approval system
27
+ - ClawHub skill marketplace for community-shared capabilities
28
+ - 50 bundled skills (smart home, notes, dev tools, media)
29
+ - Multi-agent communication via `sessions_send`
30
+
31
+ **The mental model:** OpenClaw IS the product. You deploy it, and it becomes your AI assistant.
32
+
33
+ ### Instar: A Persistent Body for Any Claude Code Agent
34
+
35
+ Instar gives Claude Code agents the infrastructure to run autonomously. Two paths to the same outcome:
36
+
37
+ **Fresh install** — `npx instar init my-agent` creates a complete project from scratch: identity files, configuration, hooks, jobs, and a persistent server. Your agent is running in under a minute.
38
+
39
+ **Existing project** — `cd my-project && npx instar init` adds autonomy infrastructure to what you've already built, without touching your existing code.
40
+
41
+ **Key capabilities:**
42
+ - Persistent server managing Claude Code sessions via tmux
43
+ - Cron-based job scheduler with quota-aware gating
44
+ - Identity system (AGENT.md, USER.md, MEMORY.md) with hooks that enforce continuity
45
+ - Telegram integration as a real-time control plane (every job gets its own topic)
46
+ - Relationship tracking across all channels and platforms
47
+ - Behavioral hooks (session-start identity injection, dangerous command guards, grounding before messaging, compaction recovery)
48
+ - Auth-secured HTTP API for session/job/relationship management
49
+ - Health watchdog with auto-recovery
50
+ - Default coherence jobs that ship out of the box
51
+
52
+ **The mental model:** Instar gives any Claude Code project a body — whether that project existed before or starts right now.
53
+
54
+ ---
55
+
56
+ ## The Architectural Divide
57
+
58
+ ### Runtime: API Wrapper vs. Development Environment
59
+
60
+ **OpenClaw** wraps the Claude API (via Pi SDK) to create an agent that responds to messages. The agent has tools (bash, read, write, edit), but it's fundamentally a **message-response loop** — users send messages, the agent processes them, the agent responds.
61
+
62
+ **Instar** runs on Claude Code — Anthropic's full agentic development environment. Each session is a complete Claude Code instance with:
63
+ - Extended thinking
64
+ - Native tool ecosystem (Read, Write, Edit, Bash, Glob, Grep, WebSearch, WebFetch)
65
+ - Sub-agent spawning via Task tool (with model-tier selection: Opus/Sonnet/Haiku)
66
+ - Hook system (pre/post tool execution, session lifecycle)
67
+ - Skill system (slash-command workflows)
68
+ - Context management with automatic compaction
69
+ - MCP server integration (Playwright, Chrome extension, etc.)
70
+
71
+ The difference: OpenClaw's agent executes tools through an API. Instar's agent IS a development environment.
72
+
73
+ ### Session Model: Single Gateway vs. Multi-Session Orchestration
74
+
75
+ **OpenClaw** runs a single gateway process. All conversations route through one WebSocket server with one embedded agent. The agent handles multiple users and channels through message routing and session management.
76
+
77
+ **Instar** manages multiple independent Claude Code sessions, each running in its own tmux process. The server orchestrates which sessions run, monitors their health, respawns them when they die, and coordinates through Telegram topics and event logs. Each session has its own context, tools, and state.
78
+
79
+ This means Instar can run 5 jobs simultaneously — one doing a health check, one processing emails, one engaging on social media, one running reflection, one responding to a Telegram message — each as an independent Claude Code instance with full capabilities.
80
+
81
+ ### Identity: Co-Created Persona vs. Earned Infrastructure
82
+
83
+ **OpenClaw's SOUL.md** is elegant. On first run, the agent and user have a bootstrap conversation: "Who am I? Who are you?" The result is a self-authored identity file the agent can modify over time. It's personal and charming.
84
+
85
+ **Instar's identity system** goes deeper:
86
+ - **AGENT.md**: Core identity (like SOUL.md)
87
+ - **USER.md**: Understanding of the primary user
88
+ - **MEMORY.md**: Accumulated learnings and context
89
+ - **Behavioral hooks**: Identity is re-injected on every session start and after every context compaction
90
+ - **Grounding before messaging**: Before any external communication, the agent re-reads its identity files
91
+ - **Self-evolution**: The agent can update its own identity files, create new skills, write new hooks, and modify its own configuration
92
+
93
+ The difference isn't the identity file — it's the infrastructure that keeps identity alive across context compressions, session restarts, and autonomous operation.
94
+
95
+ ---
96
+
97
+ ## Who Each Project Serves
98
+
99
+ ### OpenClaw: People Who Want a Personal AI Assistant
100
+
101
+ OpenClaw's ideal user wants to talk to an AI across all their messaging platforms. They want smart home control, note-taking, dev tools, media management — all through natural conversation. The value is **ubiquity** (AI everywhere you already communicate) and **personality** (an assistant that feels like yours).
102
+
103
+ ### Instar: Anyone Who Wants a Claude Code Agent That Runs Autonomously
104
+
105
+ Instar's ideal user wants a Claude Code agent with a persistent body. They might be:
106
+ - **Starting fresh** — They want an autonomous agent and don't have a project yet. `instar init my-agent` creates everything.
107
+ - **Augmenting existing work** — They already have a Claude Code project and want it to keep running when they close their laptop.
108
+
109
+ Either way, they want:
110
+ - An agent that keeps running when they close their laptop
111
+ - Scheduled tasks (monitoring, maintenance, engagement)
112
+ - Telegram communication even when they're away
113
+ - Relationship tracking with everyone the agent interacts with
114
+ - Memory that persists across sessions
115
+ - Automatic crash recovery
116
+
117
+ The value is **autonomy** (your agent works while you sleep) and **persistence** (it remembers, learns, and grows).
118
+
119
+ ---
120
+
121
+ ## What Instar Does That OpenClaw Doesn't
122
+
123
+ ### 1. Works Both Ways: Fresh Install or Augment Existing (vs. Being the Product)
124
+
125
+ OpenClaw IS the AI assistant. You deploy OpenClaw, and that's your product.
126
+
127
+ Instar works two ways:
128
+ - **Fresh:** `npx instar init my-agent` creates a complete project — identity files, configuration, hooks, jobs, server. Your agent is autonomous in under a minute.
129
+ - **Existing:** `cd my-project && npx instar init` adds autonomy infrastructure without touching your existing code. Your CLAUDE.md, skills, hooks, and tools all keep working.
130
+
131
+ Instar isn't the product. It gives your agent a body — whether you're starting from scratch or building on what exists.
132
+
133
+ ### 2. Job-Topic Coupling (Every Job Has a Home)
134
+
135
+ When instar's scheduler creates a job, it automatically creates a Telegram topic for that job. The topic becomes the user's window into the job — status updates, completion reports, and errors all flow there. If a topic is accidentally deleted, it's auto-recreated on next run.
136
+
137
+ This means your Telegram group becomes a living dashboard of agent activity, organized by job.
138
+
139
+ ### 3. Relationship Tracking as a Core System
140
+
141
+ Instar treats relationships as fundamental infrastructure — not a plugin, not an afterthought. Every person the agent interacts with, across any channel or platform, gets a relationship record that grows over time:
142
+ - Cross-platform identity resolution (same person on Telegram and email? Merged automatically)
143
+ - Interaction history with topic extraction
144
+ - Auto-derived significance scoring (frequency + recency + depth)
145
+ - Context injection before interactions (the agent "knows" who it's talking to)
146
+ - Stale relationship detection (who hasn't been contacted in a while?)
147
+
148
+ ### 4. Behavioral Hooks That Enforce Patterns
149
+
150
+ Instar ships with hooks that fire automatically:
151
+ - **Session start**: Identity context injected before the agent does anything
152
+ - **Dangerous command guard**: Blocks `rm -rf`, `git push --force`, database drops
153
+ - **Grounding before messaging**: Before sending any external message, the agent re-reads its identity
154
+ - **Compaction recovery**: When Claude's context compresses, identity is re-injected
155
+
156
+ These aren't suggestions — they're structural guardrails. "Structure over Willpower" means safety and identity aren't things the agent needs to remember. They're things the infrastructure guarantees.
157
+
158
+ ### 5. Multi-Session Orchestration
159
+
160
+ Instar's server manages multiple Claude Code sessions running in parallel. Each session is a full Claude Code instance with its own context window, tools, and state. The server:
161
+ - Enforces session limits (don't exhaust the machine)
162
+ - Monitors session health (detect zombies, reap completed)
163
+ - Queues jobs when at capacity, drains when slots open
164
+ - Emits events for cross-session awareness
165
+
166
+ ### 6. Default Coherence Jobs
167
+
168
+ Instar ships with jobs that run out of the box:
169
+ - **health-check** (every 5 min, haiku): Verify infrastructure is healthy
170
+ - **reflection-trigger** (every 4h, sonnet): Prompt the agent to reflect on recent work
171
+ - **relationship-maintenance** (daily, sonnet): Review stale relationships, update notes
172
+
173
+ These give the agent a circadian rhythm — regular self-maintenance without user intervention.
174
+
175
+ ---
176
+
177
+ ## What OpenClaw Does That Instar Doesn't
178
+
179
+ ### 1. 20+ Messaging Channels
180
+ OpenClaw connects to WhatsApp, iMessage, Signal, Discord, Slack, Matrix, and more. Instar currently supports Telegram only. (Discord and Slack are planned.)
181
+
182
+ ### 2. Native Device Apps
183
+ OpenClaw has companion apps for macOS, iOS, and Android with voice wake, camera access, location, notifications, and local command execution. Instar has no device apps.
184
+
185
+ ### 3. Voice Interface
186
+ OpenClaw supports always-listening wake words and continuous voice conversation with ElevenLabs TTS. Instar is text-only.
187
+
188
+ ### 4. Docker Sandboxing
189
+ OpenClaw has a sophisticated sandbox system (3 modes x 3 scopes x access levels) for running untrusted code. Instar runs with the user's permissions (appropriate for single-user, trusted environments).
190
+
191
+ ### 5. Skill Marketplace
192
+ OpenClaw has ClawHub — a public skill registry with search, versioning, publishing, and moderation. Instar has no marketplace (skills are project-local).
193
+
194
+ ### 6. Multi-User Support
195
+ OpenClaw handles multiple users across channels with per-user sessions, sender allowlists, and group chat management. Instar is designed for a single user/developer and their agent.
196
+
197
+ ---
198
+
199
+ ## The Philosophical Difference
200
+
201
+ **OpenClaw asks:** "How can I be your AI assistant everywhere?"
202
+
203
+ **Instar asks:** "How can your Claude Code agent get a persistent body?"
204
+
205
+ OpenClaw creates a new thing — an AI assistant — and connects it to your world.
206
+
207
+ Instar gives any Claude Code agent the infrastructure to live on its own — whether you're starting fresh or building on existing work.
208
+
209
+ ---
210
+
211
+ ## Comparison Table
212
+
213
+ | Dimension | OpenClaw | Instar |
214
+ |---|---|---|
215
+ | **What it is** | AI assistant framework | Autonomy infrastructure (fresh or existing projects) |
216
+ | **Runtime** | Pi SDK (API wrapper) | Claude Code (full dev environment) |
217
+ | **Session model** | Single gateway | Multi-session orchestration |
218
+ | **Identity** | SOUL.md (co-created) | Multi-file + hooks + compaction recovery |
219
+ | **Memory** | JSONL + optional vector | File-based + relationship tracking |
220
+ | **Messaging** | 20+ channels | Telegram (Discord/Slack planned) |
221
+ | **Voice** | Wake word + TTS | None |
222
+ | **Device apps** | macOS, iOS, Android | None |
223
+ | **Sandbox** | Docker (3x3 matrix) | User permissions |
224
+ | **Skills** | 50 + ClawHub marketplace | Project-local + self-creating |
225
+ | **Multi-user** | Yes (group chat, allowlists) | Single user |
226
+ | **Relationships** | Session-based | Deep tracking (cross-platform, significance, context) |
227
+ | **Jobs** | Cron service | Full scheduler with topic coupling |
228
+ | **Hooks** | Plugin hooks | Claude Code native hooks |
229
+ | **Self-evolution** | SOUL.md updates | Full infrastructure self-modification |
230
+ | **Testing** | Not documented | 163 tests (unit + integration + E2E) |
231
+ | **Target user** | Anyone wanting AI assistant | Developers building with Claude Code |
232
+
233
+ ---
234
+
235
+ ## Why Both Should Exist
236
+
237
+ These projects aren't competitors. They serve different needs:
238
+
239
+ - If you want an **AI assistant** that works across all your messaging platforms, with voice, device apps, and a skill marketplace: **OpenClaw**.
240
+ - If you want a **Claude Code agent with a persistent body** — fresh install or existing project — with scheduled jobs, relationship tracking, Telegram control, and self-evolution: **Instar**.
241
+
242
+ The overlap is small. The gap between "deploy an AI assistant" and "give an agent a body" is fundamental — not a feature delta, but a category difference.
243
+
244
+ ---
245
+
246
+ *This document compares Instar (v0.1.0) against OpenClaw as studied from the open-source repository in February 2026. Both projects are actively evolving.*