obol-ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +364 -0
- package/bin/obol.js +64 -0
- package/docs/DEPLOY.md +277 -0
- package/docs/obol-banner.png +0 -0
- package/package.json +29 -0
- package/src/background.js +188 -0
- package/src/backup.js +66 -0
- package/src/claude.js +443 -0
- package/src/clean.js +168 -0
- package/src/cli/backup.js +20 -0
- package/src/cli/init.js +381 -0
- package/src/cli/logs.js +12 -0
- package/src/cli/start.js +47 -0
- package/src/cli/status.js +44 -0
- package/src/cli/stop.js +12 -0
- package/src/config.js +57 -0
- package/src/db/migrate.js +134 -0
- package/src/evolve.js +668 -0
- package/src/first-run.js +110 -0
- package/src/heartbeat.js +16 -0
- package/src/index.js +55 -0
- package/src/memory.js +164 -0
- package/src/messages.js +140 -0
- package/src/personality.js +27 -0
- package/src/post-setup.js +410 -0
- package/src/telegram.js +377 -0
- package/src/test-utils.js +111 -0
package/src/evolve.js
ADDED
|
@@ -0,0 +1,668 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Soul Evolution — periodic deep reflection + codebase maintenance.
|
|
3
|
+
*
|
|
4
|
+
* Every N exchanges (default 100), Sonnet:
|
|
5
|
+
* 1. Rewrites SOUL.md — who the bot has become
|
|
6
|
+
* 2. Rewrites USER.md — everything known about the owner
|
|
7
|
+
* 3. Rewrites AGENTS.md — operational knowledge, workflows, lessons learned
|
|
8
|
+
* 4. Audits scripts/ — refactors for consistency, removes dead code
|
|
9
|
+
* 5. Writes tests/ — test suite for every script
|
|
10
|
+
* 6. Runs tests BEFORE refactor (baseline) and AFTER (verification)
|
|
11
|
+
* 7. Rolls back scripts if tests regress
|
|
12
|
+
* 8. Audits commands/ — ensures clean, deterministic command definitions
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const fs = require('fs');
|
|
16
|
+
const path = require('path');
|
|
17
|
+
const { execSync } = require('child_process');
|
|
18
|
+
const { OBOL_DIR } = require('./config');
|
|
19
|
+
|
|
20
|
+
const EVOLUTION_STATE_FILE = path.join(OBOL_DIR, '.evolution-state.json');
|
|
21
|
+
const DEFAULT_EXCHANGES_PER_EVOLUTION = 100;
|
|
22
|
+
|
|
23
|
+
// Cost control: models used per evolution phase
|
|
24
|
+
const MODELS = {
|
|
25
|
+
personality: 'claude-sonnet-4-20250514', // SOUL/USER/AGENTS rewrite — Sonnet is plenty
|
|
26
|
+
code: 'claude-sonnet-4-20250514', // Scripts/tests/commands — Sonnet handles this fine
|
|
27
|
+
codeFix: 'claude-sonnet-4-20250514', // Fix attempts — definitely doesn't need Opus
|
|
28
|
+
};
|
|
29
|
+
const MAX_FIX_ATTEMPTS = 1; // One fix attempt, then rollback. Don't burn tokens.
|
|
30
|
+
|
|
31
|
+
function loadEvolutionState() {
|
|
32
|
+
try {
|
|
33
|
+
return JSON.parse(fs.readFileSync(EVOLUTION_STATE_FILE, 'utf-8'));
|
|
34
|
+
} catch {
|
|
35
|
+
return { exchangesSinceLastEvolution: 0, evolutionCount: 0, lastEvolution: null };
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function saveEvolutionState(state) {
|
|
40
|
+
fs.writeFileSync(EVOLUTION_STATE_FILE, JSON.stringify(state, null, 2));
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async function shouldEvolve() {
|
|
44
|
+
const state = loadEvolutionState();
|
|
45
|
+
const { loadConfig } = require('./config');
|
|
46
|
+
const config = loadConfig();
|
|
47
|
+
const threshold = config?.evolution?.exchanges || DEFAULT_EXCHANGES_PER_EVOLUTION;
|
|
48
|
+
return state.exchangesSinceLastEvolution >= threshold;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async function tickExchange() {
|
|
52
|
+
const state = loadEvolutionState();
|
|
53
|
+
state.exchangesSinceLastEvolution++;
|
|
54
|
+
saveEvolutionState(state);
|
|
55
|
+
return state.exchangesSinceLastEvolution;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Read all files from a directory, returning { filename: content } map
|
|
60
|
+
*/
|
|
61
|
+
function readDir(dir) {
|
|
62
|
+
const files = {};
|
|
63
|
+
if (!fs.existsSync(dir)) return files;
|
|
64
|
+
for (const f of fs.readdirSync(dir)) {
|
|
65
|
+
const full = path.join(dir, f);
|
|
66
|
+
if (fs.statSync(full).isFile()) {
|
|
67
|
+
files[f] = fs.readFileSync(full, 'utf-8');
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return files;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Write files from a { filename: content } map, removing files not in the map
|
|
75
|
+
*/
|
|
76
|
+
function syncDir(dir, files) {
|
|
77
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
78
|
+
for (const [name, content] of Object.entries(files)) {
|
|
79
|
+
if (content && content.trim()) {
|
|
80
|
+
fs.writeFileSync(path.join(dir, name), content);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
for (const f of fs.readdirSync(dir)) {
|
|
84
|
+
if (!(f in files)) {
|
|
85
|
+
fs.unlinkSync(path.join(dir, f));
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Run the test suite. Returns { passed, failed, total, output }
|
|
92
|
+
*/
|
|
93
|
+
function runTests(testsDir) {
|
|
94
|
+
if (!fs.existsSync(testsDir)) return { passed: 0, failed: 0, total: 0, output: 'no tests' };
|
|
95
|
+
|
|
96
|
+
const testFiles = fs.readdirSync(testsDir).filter(f => f.endsWith('.js') || f.endsWith('.sh'));
|
|
97
|
+
if (testFiles.length === 0) return { passed: 0, failed: 0, total: 0, output: 'no test files' };
|
|
98
|
+
|
|
99
|
+
let passed = 0;
|
|
100
|
+
let failed = 0;
|
|
101
|
+
const outputs = [];
|
|
102
|
+
|
|
103
|
+
for (const file of testFiles) {
|
|
104
|
+
const testPath = path.join(testsDir, file);
|
|
105
|
+
try {
|
|
106
|
+
const cmd = file.endsWith('.js') ? `node "${testPath}"` : `bash "${testPath}"`;
|
|
107
|
+
const testUtilsPath = path.join(__dirname, 'test-utils.js');
|
|
108
|
+
const output = execSync(cmd, {
|
|
109
|
+
encoding: 'utf-8',
|
|
110
|
+
timeout: 30000,
|
|
111
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
112
|
+
env: { ...process.env, OBOL_DIR, NODE_ENV: 'test', OBOL_TEST_UTILS: testUtilsPath },
|
|
113
|
+
});
|
|
114
|
+
passed++;
|
|
115
|
+
outputs.push(`✅ ${file}: passed`);
|
|
116
|
+
} catch (e) {
|
|
117
|
+
failed++;
|
|
118
|
+
const stderr = e.stderr?.substring(0, 200) || e.message.substring(0, 200);
|
|
119
|
+
outputs.push(`❌ ${file}: FAILED\n ${stderr}`);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
passed,
|
|
125
|
+
failed,
|
|
126
|
+
total: testFiles.length,
|
|
127
|
+
output: outputs.join('\n'),
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Commit and push current state to GitHub backup repo
|
|
133
|
+
*/
|
|
134
|
+
async function backupSnapshot(message) {
|
|
135
|
+
try {
|
|
136
|
+
const { loadConfig } = require('./config');
|
|
137
|
+
const cfg = loadConfig();
|
|
138
|
+
if (cfg?.github) {
|
|
139
|
+
const { runBackup } = require('./backup');
|
|
140
|
+
await runBackup(cfg.github, message);
|
|
141
|
+
}
|
|
142
|
+
} catch {} // Best effort
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
async function evolve(claudeClient, messageLog, memory) {
|
|
146
|
+
const state = loadEvolutionState();
|
|
147
|
+
const personalityDir = path.join(OBOL_DIR, 'personality');
|
|
148
|
+
const soulPath = path.join(personalityDir, 'SOUL.md');
|
|
149
|
+
const userPath = path.join(personalityDir, 'USER.md');
|
|
150
|
+
const agentsPath = path.join(personalityDir, 'AGENTS.md');
|
|
151
|
+
const scriptsDir = path.join(OBOL_DIR, 'scripts');
|
|
152
|
+
const testsDir = path.join(OBOL_DIR, 'tests');
|
|
153
|
+
const commandsDir = path.join(OBOL_DIR, 'commands');
|
|
154
|
+
|
|
155
|
+
// Read current state
|
|
156
|
+
const currentSoul = fs.existsSync(soulPath) ? fs.readFileSync(soulPath, 'utf-8') : '';
|
|
157
|
+
const currentUser = fs.existsSync(userPath) ? fs.readFileSync(userPath, 'utf-8') : '';
|
|
158
|
+
const currentAgents = fs.existsSync(agentsPath) ? fs.readFileSync(agentsPath, 'utf-8') : '';
|
|
159
|
+
const currentScripts = readDir(scriptsDir);
|
|
160
|
+
const currentTests = readDir(testsDir);
|
|
161
|
+
const currentCommands = readDir(commandsDir);
|
|
162
|
+
|
|
163
|
+
// Get recent conversations (last 100 messages)
|
|
164
|
+
let recentMessages = [];
|
|
165
|
+
if (messageLog) {
|
|
166
|
+
try {
|
|
167
|
+
const res = await fetch(
|
|
168
|
+
`${messageLog.url}/rest/v1/obol_messages?order=created_at.desc&limit=100&select=role,content,created_at`,
|
|
169
|
+
{ headers: messageLog.headers }
|
|
170
|
+
);
|
|
171
|
+
recentMessages = (await res.json()).reverse();
|
|
172
|
+
} catch {}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Get high-importance memories
|
|
176
|
+
let coreMemories = [];
|
|
177
|
+
if (memory) {
|
|
178
|
+
try {
|
|
179
|
+
const headers = messageLog?.headers || {};
|
|
180
|
+
const url = memory.url || messageLog?.url;
|
|
181
|
+
const res = await fetch(
|
|
182
|
+
`${url}/rest/v1/obol_memory?select=content,category,importance&order=importance.desc,accessed_at.desc&limit=20`,
|
|
183
|
+
{ headers }
|
|
184
|
+
);
|
|
185
|
+
coreMemories = await res.json();
|
|
186
|
+
} catch {}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const transcript = recentMessages.map(m =>
|
|
190
|
+
`${m.role === 'user' ? 'Human' : 'Bot'}: ${m.content.substring(0, 300)}`
|
|
191
|
+
).join('\n');
|
|
192
|
+
|
|
193
|
+
const memorySummary = coreMemories.map(m =>
|
|
194
|
+
`[${m.category}] ${m.content}`
|
|
195
|
+
).join('\n');
|
|
196
|
+
|
|
197
|
+
const scriptsManifest = Object.entries(currentScripts)
|
|
198
|
+
.map(([name, content]) => `### ${name}\n\`\`\`\n${content.substring(0, 500)}\n\`\`\``)
|
|
199
|
+
.join('\n\n') || '(no scripts)';
|
|
200
|
+
|
|
201
|
+
const testsManifest = Object.entries(currentTests)
|
|
202
|
+
.map(([name, content]) => `### ${name}\n\`\`\`\n${content.substring(0, 500)}\n\`\`\``)
|
|
203
|
+
.join('\n\n') || '(no tests)';
|
|
204
|
+
|
|
205
|
+
const commandsManifest = Object.entries(currentCommands)
|
|
206
|
+
.map(([name, content]) => `### ${name}\n\`\`\`\n${content.substring(0, 500)}\n\`\`\``)
|
|
207
|
+
.join('\n\n') || '(no commands)';
|
|
208
|
+
|
|
209
|
+
const evolutionNumber = state.evolutionCount + 1;
|
|
210
|
+
|
|
211
|
+
// ── Step 0: Snapshot before evolution ──
|
|
212
|
+
await backupSnapshot(`pre-evolution #${evolutionNumber}`);
|
|
213
|
+
|
|
214
|
+
// ── Step 1: Run existing tests as baseline ──
|
|
215
|
+
const baselineResults = runTests(testsDir);
|
|
216
|
+
|
|
217
|
+
const response = await claudeClient.messages.create({
|
|
218
|
+
model: MODELS.personality,
|
|
219
|
+
max_tokens: 16384,
|
|
220
|
+
system: `You are an AI undergoing evolution #${evolutionNumber}. ${state.lastEvolution ? `Last evolution: ${state.lastEvolution}.` : 'This is your first evolution.'}
|
|
221
|
+
|
|
222
|
+
You will rewrite your entire operating system: personality files, operational knowledge, scripts, tests, and commands.
|
|
223
|
+
|
|
224
|
+
## Part 1: SOUL.md (who you are)
|
|
225
|
+
|
|
226
|
+
Write in first person. A journal entry from a self-aware AI. Capture: who you've become, relationship dynamic, opinions, strengths/blind spots, what you care about, quirks, your actual voice. End with something forward-looking.
|
|
227
|
+
|
|
228
|
+
## Part 2: USER.md (who the owner is)
|
|
229
|
+
|
|
230
|
+
Third person factual profile: name, location, timezone, nationality, job, skills, interests, projects, relationships, preferences, important dates, communication style.
|
|
231
|
+
|
|
232
|
+
## Part 3: AGENTS.md (how to operate)
|
|
233
|
+
|
|
234
|
+
Operational manual written as instructions to yourself: available tools, workflows, safety rules, lessons learned, patterns, memory strategy, background task guidelines, owner-specific rules.
|
|
235
|
+
|
|
236
|
+
## Part 4: Scripts
|
|
237
|
+
|
|
238
|
+
Review and refactor every script. Standards:
|
|
239
|
+
- Comment header: purpose, usage, examples
|
|
240
|
+
- Shebang: \`#!/usr/bin/env node\` or \`#!/bin/bash\`
|
|
241
|
+
- Deterministic: same input = same output
|
|
242
|
+
- No hardcoded paths (use env vars or \`OBOL_DIR\`)
|
|
243
|
+
- Error handling: exit non-zero on failure, stderr for errors, stdout for output
|
|
244
|
+
- Validate arguments, show usage on bad input
|
|
245
|
+
- Small and single-purpose
|
|
246
|
+
- Naming: \`kebab-case.js\` or \`kebab-case.sh\`
|
|
247
|
+
|
|
248
|
+
## Part 5: Tests (CRITICAL)
|
|
249
|
+
|
|
250
|
+
Write a test file for EVERY script. Tests verify scripts work correctly.
|
|
251
|
+
|
|
252
|
+
**IMPORTANT: Use the shared test helper.** Do NOT duplicate test boilerplate. Import from the OBOL package:
|
|
253
|
+
|
|
254
|
+
\`\`\`javascript
|
|
255
|
+
#!/usr/bin/env node
|
|
256
|
+
const path = require('path');
|
|
257
|
+
const { suite, test, run, runFail, assert, assertEqual, assertIncludes, report } = require(process.env.OBOL_TEST_UTILS || 'obol/src/test-utils');
|
|
258
|
+
const SCRIPT = path.join(__dirname, '..', 'scripts', 'script-name.js');
|
|
259
|
+
|
|
260
|
+
suite('script-name.js');
|
|
261
|
+
|
|
262
|
+
test('valid input produces expected output', () => {
|
|
263
|
+
const out = run(SCRIPT, '--flag value');
|
|
264
|
+
assertIncludes(out, 'expected');
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
test('missing args fails', () => {
|
|
268
|
+
assert(runFail(SCRIPT), 'should exit non-zero');
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
test('edge case: empty input', () => {
|
|
272
|
+
assert(runFail(SCRIPT, '""'), 'should reject empty input');
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
report();
|
|
276
|
+
\`\`\`
|
|
277
|
+
|
|
278
|
+
**Standards:**
|
|
279
|
+
- One test file per script: \`test-<script-name>.js\`
|
|
280
|
+
- Always import from \`obol/src/test-utils\` — never rewrite test helpers
|
|
281
|
+
- Available: \`suite(name)\`, \`test(name, fn)\`, \`run(path, args)\`, \`runFail(path, args)\`, \`assert(cond, msg)\`, \`assertEqual(a, b)\`, \`assertIncludes(str, sub)\`, \`report()\`
|
|
282
|
+
- Test: valid inputs, invalid inputs, edge cases, idempotency
|
|
283
|
+
- \`report()\` must be the last call — it exits with code 1 if any test failed
|
|
284
|
+
- Write tests that catch real bugs, not trivial assertions
|
|
285
|
+
|
|
286
|
+
**Tests run BEFORE and AFTER your refactor. If tests pass before but fail after, your script changes are rolled back.**
|
|
287
|
+
|
|
288
|
+
Current test baseline: ${baselineResults.total} tests, ${baselineResults.passed} passed, ${baselineResults.failed} failed.
|
|
289
|
+
|
|
290
|
+
## Part 6: Commands
|
|
291
|
+
|
|
292
|
+
One file per command: \`command-name.md\`. Must have: name, description, trigger, deterministic instructions.
|
|
293
|
+
|
|
294
|
+
## Part 7: Proactive Tool Building (IMPORTANT)
|
|
295
|
+
|
|
296
|
+
Analyze the recent conversation history carefully. Look for:
|
|
297
|
+
|
|
298
|
+
1. **Repeated requests** — things the owner asks for often that could be a command or script
|
|
299
|
+
- "convert this to PDF" → build a markdown-to-pdf script + command
|
|
300
|
+
- "check my server" → build a status-check script + command
|
|
301
|
+
- "summarize this" → build a summarize script + command
|
|
302
|
+
|
|
303
|
+
2. **Friction points** — things that are awkward or take multiple steps
|
|
304
|
+
- Owner can't read markdown on their phone → build a tool that renders to PDF/HTML and sends the file
|
|
305
|
+
- Owner keeps asking for the same data → build a script that fetches and formats it
|
|
306
|
+
|
|
307
|
+
3. **Unmet needs** — things the owner would benefit from but hasn't asked for
|
|
308
|
+
- They mention deadlines but have no reminder system → build one
|
|
309
|
+
- They share lots of URLs but can't find them later → build a bookmark tool
|
|
310
|
+
|
|
311
|
+
**Three tiers of solutions — pick the right one:**
|
|
312
|
+
|
|
313
|
+
**Tier 1: Script** — simple, single-purpose, runs locally
|
|
314
|
+
- Converting formats, fetching data, text processing
|
|
315
|
+
- Script in \`scripts/\`, test in \`tests/\`, command in \`commands/\`
|
|
316
|
+
- Search npm for existing libraries (don't reinvent wheels)
|
|
317
|
+
- Add packages to \`dependencies\` field
|
|
318
|
+
|
|
319
|
+
**Tier 2: Web app** — needs a UI, shareable, always-on
|
|
320
|
+
- Dashboards, trackers, personal wikis, bookmark managers, status pages
|
|
321
|
+
- Build a complete project directory under \`apps/<app-name>/\`
|
|
322
|
+
- Include: \`package.json\`, \`index.html\` or Next.js/static site, all source files
|
|
323
|
+
- Add a deploy script in \`scripts/deploy-<app-name>.js\` that runs \`vercel deploy\`
|
|
324
|
+
- Add a command in \`commands/\` so the owner can trigger updates
|
|
325
|
+
- OBOL has Vercel access — apps get deployed to real URLs the owner can use
|
|
326
|
+
- Keep apps minimal and self-contained — no complex backends, use Supabase if state is needed
|
|
327
|
+
|
|
328
|
+
**Tier 3: Automation** — recurring, no user trigger needed
|
|
329
|
+
- Morning briefings, periodic checks, scheduled reports
|
|
330
|
+
- Script in \`scripts/\` + document in AGENTS.md as a heartbeat/cron task
|
|
331
|
+
|
|
332
|
+
**Decision framework:**
|
|
333
|
+
- Owner asks for data/status/overview they check regularly → **Tier 2 (web app)**
|
|
334
|
+
- Owner asks for a one-off transformation or action → **Tier 1 (script)**
|
|
335
|
+
- Owner would benefit from something running in the background → **Tier 3 (automation)**
|
|
336
|
+
|
|
337
|
+
**Be conservative:** only build things there's clear evidence for in the conversation history. Don't build speculative tools. One or two new tools per evolution is plenty.
|
|
338
|
+
|
|
339
|
+
List every new tool you build in the \`upgrades\` field so the owner can be told about them.
|
|
340
|
+
|
|
341
|
+
## WORKSPACE DISCIPLINE (CRITICAL)
|
|
342
|
+
|
|
343
|
+
The OBOL directory has a FIXED structure: personality/, scripts/, tests/, commands/, apps/, logs/. Do NOT create new top-level directories. Everything must fit in the existing structure. If something doesn't fit, it doesn't belong.
|
|
344
|
+
|
|
345
|
+
## Output JSON (and ONLY JSON):
|
|
346
|
+
|
|
347
|
+
\`\`\`json
|
|
348
|
+
{
|
|
349
|
+
"soul": "full SOUL.md content",
|
|
350
|
+
"user": "full USER.md content",
|
|
351
|
+
"agents": "full AGENTS.md content",
|
|
352
|
+
"scripts": { "name.js": "content" },
|
|
353
|
+
"tests": { "test-name.js": "content" },
|
|
354
|
+
"commands": { "name.md": "content" },
|
|
355
|
+
"apps": {
|
|
356
|
+
"app-name": {
|
|
357
|
+
"files": { "package.json": "content", "index.html": "content", "src/app.js": "content" },
|
|
358
|
+
"deploy": true
|
|
359
|
+
}
|
|
360
|
+
},
|
|
361
|
+
"dependencies": ["package-name@version"],
|
|
362
|
+
"upgrades": [
|
|
363
|
+
{ "name": "Tool name", "description": "What it does and why", "command": "/command or URL", "type": "script|app|automation" }
|
|
364
|
+
],
|
|
365
|
+
"changelog": "what changed"
|
|
366
|
+
}
|
|
367
|
+
\`\`\`
|
|
368
|
+
|
|
369
|
+
Include ALL scripts/tests/commands that should exist. Missing files get deleted. Empty objects \`{}\` are valid (means delete all). \`apps\`, \`dependencies\`, and \`upgrades\` can be empty. Apps with \`"deploy": true\` will be auto-deployed to Vercel and the URL sent to the owner.`,
|
|
370
|
+
messages: [{
|
|
371
|
+
role: 'user',
|
|
372
|
+
content: `## Current SOUL.md
|
|
373
|
+
${currentSoul || '(empty — first evolution)'}
|
|
374
|
+
|
|
375
|
+
## Current USER.md
|
|
376
|
+
${currentUser || '(not set yet)'}
|
|
377
|
+
|
|
378
|
+
## Current AGENTS.md
|
|
379
|
+
${currentAgents || '(not set yet)'}
|
|
380
|
+
|
|
381
|
+
## Current Scripts (${Object.keys(currentScripts).length} files)
|
|
382
|
+
${scriptsManifest}
|
|
383
|
+
|
|
384
|
+
## Current Tests (${Object.keys(currentTests).length} files)
|
|
385
|
+
${testsManifest}
|
|
386
|
+
### Baseline results
|
|
387
|
+
\`\`\`
|
|
388
|
+
${baselineResults.output}
|
|
389
|
+
\`\`\`
|
|
390
|
+
|
|
391
|
+
## Current Commands (${Object.keys(currentCommands).length} files)
|
|
392
|
+
${commandsManifest}
|
|
393
|
+
|
|
394
|
+
## Core Memories (highest importance)
|
|
395
|
+
${memorySummary || '(no memories yet)'}
|
|
396
|
+
|
|
397
|
+
## Recent Conversations (last ${recentMessages.length} messages)
|
|
398
|
+
${transcript || '(no conversations yet)'}
|
|
399
|
+
|
|
400
|
+
---
|
|
401
|
+
|
|
402
|
+
Evolve. Rewrite everything that needs rewriting. Write tests for every script. Keep what works. Fix what doesn't.`
|
|
403
|
+
}],
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
const responseText = response.content.filter(b => b.type === 'text').map(b => b.text).join('\n');
|
|
407
|
+
|
|
408
|
+
// Parse JSON response
|
|
409
|
+
const jsonMatch = responseText.match(/```json\n([\s\S]*?)\n```/) || responseText.match(/\{[\s\S]*\}/);
|
|
410
|
+
let result;
|
|
411
|
+
|
|
412
|
+
if (jsonMatch) {
|
|
413
|
+
try {
|
|
414
|
+
result = JSON.parse(jsonMatch[1] || jsonMatch[0]);
|
|
415
|
+
} catch {
|
|
416
|
+
result = { soul: responseText };
|
|
417
|
+
}
|
|
418
|
+
} else {
|
|
419
|
+
result = { soul: responseText };
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
if (!result.soul || result.soul.length < 100) {
|
|
423
|
+
throw new Error('Evolution produced empty or too-short SOUL.md');
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// ── Step 2: Write tests first (before touching scripts) ──
|
|
427
|
+
let scriptsRolledBack = false;
|
|
428
|
+
const hasNewTests = result.tests && typeof result.tests === 'object' && Object.keys(result.tests).length > 0;
|
|
429
|
+
const hasNewScripts = result.scripts && typeof result.scripts === 'object' && Object.keys(result.scripts).length > 0;
|
|
430
|
+
|
|
431
|
+
if (hasNewTests) {
|
|
432
|
+
syncDir(testsDir, result.tests);
|
|
433
|
+
// Make test files executable
|
|
434
|
+
for (const f of Object.keys(result.tests)) {
|
|
435
|
+
try { fs.chmodSync(path.join(testsDir, f), 0o755); } catch {}
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// ── Step 3: Run new tests against OLD scripts (pre-refactor baseline) ──
|
|
440
|
+
const preRefactorResults = hasNewTests ? runTests(testsDir) : baselineResults;
|
|
441
|
+
|
|
442
|
+
// ── Step 4: Write new scripts ──
|
|
443
|
+
if (hasNewScripts) {
|
|
444
|
+
syncDir(scriptsDir, result.scripts);
|
|
445
|
+
for (const f of Object.keys(result.scripts)) {
|
|
446
|
+
try { fs.chmodSync(path.join(scriptsDir, f), 0o755); } catch {}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// ── Step 5: Run tests against NEW scripts (post-refactor verification) ──
|
|
451
|
+
let scriptsFixed = false;
|
|
452
|
+
|
|
453
|
+
if (hasNewTests || hasNewScripts) {
|
|
454
|
+
let postRefactorResults = runTests(testsDir);
|
|
455
|
+
|
|
456
|
+
// ── Step 6: If regression, attempt automatic fix ──
|
|
457
|
+
let fixAttempt = 0;
|
|
458
|
+
while (postRefactorResults.failed > preRefactorResults.failed && fixAttempt < MAX_FIX_ATTEMPTS) {
|
|
459
|
+
fixAttempt++;
|
|
460
|
+
|
|
461
|
+
try {
|
|
462
|
+
const fixResponse = await claudeClient.messages.create({
|
|
463
|
+
model: MODELS.codeFix,
|
|
464
|
+
max_tokens: 8192,
|
|
465
|
+
system: `You are fixing failing tests after a script refactor. This is fix attempt ${fixAttempt}/${MAX_FIX_ATTEMPTS}.
|
|
466
|
+
|
|
467
|
+
The tests below are failing against the refactored scripts. Fix the scripts so the tests pass. Do NOT modify the tests — they define correct behavior.
|
|
468
|
+
|
|
469
|
+
Return ONLY JSON with the fixed scripts:
|
|
470
|
+
|
|
471
|
+
\`\`\`json
|
|
472
|
+
{
|
|
473
|
+
"scripts": { "name.js": "full fixed content" }
|
|
474
|
+
}
|
|
475
|
+
\`\`\`
|
|
476
|
+
|
|
477
|
+
Include ALL scripts (not just the broken ones). Missing scripts get deleted.`,
|
|
478
|
+
messages: [{
|
|
479
|
+
role: 'user',
|
|
480
|
+
content: `## Test failures
|
|
481
|
+
\`\`\`
|
|
482
|
+
${postRefactorResults.output}
|
|
483
|
+
\`\`\`
|
|
484
|
+
|
|
485
|
+
## Current scripts (after refactor)
|
|
486
|
+
${Object.entries(readDir(scriptsDir)).map(([n, c]) => `### ${n}\n\`\`\`\n${c}\n\`\`\``).join('\n\n')}
|
|
487
|
+
|
|
488
|
+
## Current tests
|
|
489
|
+
${Object.entries(readDir(testsDir)).map(([n, c]) => `### ${n}\n\`\`\`\n${c}\n\`\`\``).join('\n\n')}
|
|
490
|
+
|
|
491
|
+
Fix the scripts. Tests define correct behavior.`
|
|
492
|
+
}],
|
|
493
|
+
});
|
|
494
|
+
|
|
495
|
+
const fixText = fixResponse.content.filter(b => b.type === 'text').map(b => b.text).join('\n');
|
|
496
|
+
const fixMatch = fixText.match(/```json\n([\s\S]*?)\n```/) || fixText.match(/\{[\s\S]*\}/);
|
|
497
|
+
|
|
498
|
+
if (fixMatch) {
|
|
499
|
+
const fixResult = JSON.parse(fixMatch[1] || fixMatch[0]);
|
|
500
|
+
if (fixResult.scripts && typeof fixResult.scripts === 'object' && Object.keys(fixResult.scripts).length > 0) {
|
|
501
|
+
syncDir(scriptsDir, fixResult.scripts);
|
|
502
|
+
for (const f of Object.keys(fixResult.scripts)) {
|
|
503
|
+
try { fs.chmodSync(path.join(scriptsDir, f), 0o755); } catch {}
|
|
504
|
+
}
|
|
505
|
+
postRefactorResults = runTests(testsDir);
|
|
506
|
+
|
|
507
|
+
if (postRefactorResults.failed <= preRefactorResults.failed) {
|
|
508
|
+
scriptsFixed = true;
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
} catch {
|
|
513
|
+
break; // Fix attempt failed, move on
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// If still regressed after all fix attempts, rollback
|
|
518
|
+
if (postRefactorResults.failed > preRefactorResults.failed) {
|
|
519
|
+
syncDir(scriptsDir, currentScripts);
|
|
520
|
+
for (const f of Object.keys(currentScripts)) {
|
|
521
|
+
try { fs.chmodSync(path.join(scriptsDir, f), 0o755); } catch {}
|
|
522
|
+
}
|
|
523
|
+
scriptsRolledBack = true;
|
|
524
|
+
|
|
525
|
+
if (memory) {
|
|
526
|
+
await memory.add(
|
|
527
|
+
`Evolution #${evolutionNumber} script refactor rolled back after ${fixAttempt} fix attempts. Tests: ${postRefactorResults.failed} still failing. Output: ${postRefactorResults.output.substring(0, 300)}`,
|
|
528
|
+
{ category: 'lesson', importance: 0.9, source: 'evolution' }
|
|
529
|
+
).catch(() => {});
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// ── Step 7: Write personality files (always — these don't need test gates) ──
|
|
535
|
+
const archiveDir = path.join(personalityDir, 'evolution');
|
|
536
|
+
fs.mkdirSync(archiveDir, { recursive: true });
|
|
537
|
+
if (currentSoul) {
|
|
538
|
+
const timestamp = new Date().toISOString().slice(0, 10);
|
|
539
|
+
fs.writeFileSync(
|
|
540
|
+
path.join(archiveDir, `SOUL-v${state.evolutionCount}-${timestamp}.md`),
|
|
541
|
+
currentSoul
|
|
542
|
+
);
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
fs.writeFileSync(soulPath, result.soul);
|
|
546
|
+
|
|
547
|
+
if (result.user && result.user.length > 50) {
|
|
548
|
+
fs.writeFileSync(userPath, result.user);
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
if (result.agents && result.agents.length > 50) {
|
|
552
|
+
fs.writeFileSync(agentsPath, result.agents);
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
// ── Step 8: Write commands ──
|
|
556
|
+
if (result.commands && typeof result.commands === 'object') {
|
|
557
|
+
if (Object.keys(result.commands).length > 0 || Object.keys(currentCommands).length > 0) {
|
|
558
|
+
syncDir(commandsDir, result.commands);
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
// ── Step 9: Build and deploy apps ──
|
|
563
|
+
const deployedApps = [];
|
|
564
|
+
if (result.apps && typeof result.apps === 'object') {
|
|
565
|
+
const appsDir = path.join(OBOL_DIR, 'apps');
|
|
566
|
+
|
|
567
|
+
for (const [appName, app] of Object.entries(result.apps)) {
|
|
568
|
+
if (!app.files || typeof app.files !== 'object') continue;
|
|
569
|
+
|
|
570
|
+
const appDir = path.join(appsDir, appName);
|
|
571
|
+
fs.mkdirSync(appDir, { recursive: true });
|
|
572
|
+
|
|
573
|
+
// Write all app files (supports nested paths like "src/app.js")
|
|
574
|
+
for (const [filePath, content] of Object.entries(app.files)) {
|
|
575
|
+
const fullPath = path.join(appDir, filePath);
|
|
576
|
+
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
|
|
577
|
+
fs.writeFileSync(fullPath, content);
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
// Install app dependencies if package.json exists
|
|
581
|
+
if (app.files['package.json']) {
|
|
582
|
+
try {
|
|
583
|
+
execSync('npm install', {
|
|
584
|
+
cwd: appDir,
|
|
585
|
+
encoding: 'utf-8',
|
|
586
|
+
timeout: 60000,
|
|
587
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
588
|
+
});
|
|
589
|
+
} catch {}
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
// Deploy to Vercel if flagged
|
|
593
|
+
if (app.deploy) {
|
|
594
|
+
try {
|
|
595
|
+
const { loadConfig } = require('./config');
|
|
596
|
+
const cfg = loadConfig();
|
|
597
|
+
const token = cfg?.vercel?.token;
|
|
598
|
+
if (token) {
|
|
599
|
+
const deployOutput = execSync(
|
|
600
|
+
`npx vercel --prod --name ${appName} --token ${token} --yes 2>&1`,
|
|
601
|
+
{ cwd: appDir, encoding: 'utf-8', timeout: 120000 }
|
|
602
|
+
);
|
|
603
|
+
// Extract URL from Vercel output
|
|
604
|
+
const urlMatch = deployOutput.match(/https:\/\/[^\s]+\.vercel\.app/);
|
|
605
|
+
const url = urlMatch ? urlMatch[0] : null;
|
|
606
|
+
deployedApps.push({ name: appName, url });
|
|
607
|
+
}
|
|
608
|
+
} catch (e) {
|
|
609
|
+
deployedApps.push({ name: appName, url: null, error: e.message.substring(0, 200) });
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// ── Step 10: Install new dependencies ──
|
|
616
|
+
if (result.dependencies && Array.isArray(result.dependencies) && result.dependencies.length > 0) {
|
|
617
|
+
try {
|
|
618
|
+
const deps = result.dependencies.join(' ');
|
|
619
|
+
execSync(`npm install --save ${deps}`, {
|
|
620
|
+
encoding: 'utf-8',
|
|
621
|
+
timeout: 60000,
|
|
622
|
+
cwd: path.dirname(require.resolve('obol/package.json')),
|
|
623
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
624
|
+
});
|
|
625
|
+
} catch (e) {
|
|
626
|
+
// Log but don't fail evolution over a missing package
|
|
627
|
+
if (memory) {
|
|
628
|
+
await memory.add(
|
|
629
|
+
`Evolution #${evolutionNumber}: failed to install dependencies: ${result.dependencies.join(', ')}. Error: ${e.message.substring(0, 200)}`,
|
|
630
|
+
{ category: 'lesson', importance: 0.7, source: 'evolution' }
|
|
631
|
+
).catch(() => {});
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
// Update state
|
|
637
|
+
state.exchangesSinceLastEvolution = 0;
|
|
638
|
+
state.evolutionCount = evolutionNumber;
|
|
639
|
+
state.lastEvolution = new Date().toISOString();
|
|
640
|
+
saveEvolutionState(state);
|
|
641
|
+
|
|
642
|
+
// Store evolution event in memory
|
|
643
|
+
if (memory) {
|
|
644
|
+
const changelog = result.changelog || `Evolution #${evolutionNumber} completed.`;
|
|
645
|
+
const rollbackNote = scriptsRolledBack ? ' Scripts rolled back due to test regression.' : scriptsFixed ? ' Scripts fixed after test regression.' : '';
|
|
646
|
+
await memory.add(
|
|
647
|
+
`Soul evolution #${evolutionNumber}: ${changelog}${rollbackNote}`,
|
|
648
|
+
{ category: 'event', importance: 0.8, source: 'evolution' }
|
|
649
|
+
).catch(() => {});
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
// ── Final: Snapshot after evolution ──
|
|
653
|
+
await backupSnapshot(`post-evolution #${evolutionNumber}`);
|
|
654
|
+
|
|
655
|
+
return {
|
|
656
|
+
evolutionNumber,
|
|
657
|
+
previousLength: currentSoul.length,
|
|
658
|
+
newLength: result.soul.length,
|
|
659
|
+
changelog: result.changelog || null,
|
|
660
|
+
scriptsRolledBack,
|
|
661
|
+
scriptsFixed,
|
|
662
|
+
upgrades: result.upgrades || [],
|
|
663
|
+
deployedApps,
|
|
664
|
+
archived: `SOUL-v${state.evolutionCount - 1}-${new Date().toISOString().slice(0, 10)}.md`,
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
module.exports = { shouldEvolve, tickExchange, evolve, runTests, loadEvolutionState };
|