obol-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/evolve.js ADDED
@@ -0,0 +1,668 @@
1
+ /**
2
+ * Soul Evolution — periodic deep reflection + codebase maintenance.
3
+ *
4
+ * Every N exchanges (default 100), Sonnet:
5
+ * 1. Rewrites SOUL.md — who the bot has become
6
+ * 2. Rewrites USER.md — everything known about the owner
7
+ * 3. Rewrites AGENTS.md — operational knowledge, workflows, lessons learned
8
+ * 4. Audits scripts/ — refactors for consistency, removes dead code
9
+ * 5. Writes tests/ — test suite for every script
10
+ * 6. Runs tests BEFORE refactor (baseline) and AFTER (verification)
11
+ * 7. Rolls back scripts if tests regress
12
+ * 8. Audits commands/ — ensures clean, deterministic command definitions
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+ const { execSync } = require('child_process');
18
+ const { OBOL_DIR } = require('./config');
19
+
20
+ const EVOLUTION_STATE_FILE = path.join(OBOL_DIR, '.evolution-state.json');
21
+ const DEFAULT_EXCHANGES_PER_EVOLUTION = 100;
22
+
23
+ // Cost control: models used per evolution phase
24
+ const MODELS = {
25
+ personality: 'claude-sonnet-4-20250514', // SOUL/USER/AGENTS rewrite — Sonnet is plenty
26
+ code: 'claude-sonnet-4-20250514', // Scripts/tests/commands — Sonnet handles this fine
27
+ codeFix: 'claude-sonnet-4-20250514', // Fix attempts — definitely doesn't need Opus
28
+ };
29
+ const MAX_FIX_ATTEMPTS = 1; // One fix attempt, then rollback. Don't burn tokens.
30
+
31
+ function loadEvolutionState() {
32
+ try {
33
+ return JSON.parse(fs.readFileSync(EVOLUTION_STATE_FILE, 'utf-8'));
34
+ } catch {
35
+ return { exchangesSinceLastEvolution: 0, evolutionCount: 0, lastEvolution: null };
36
+ }
37
+ }
38
+
39
+ function saveEvolutionState(state) {
40
+ fs.writeFileSync(EVOLUTION_STATE_FILE, JSON.stringify(state, null, 2));
41
+ }
42
+
43
+ async function shouldEvolve() {
44
+ const state = loadEvolutionState();
45
+ const { loadConfig } = require('./config');
46
+ const config = loadConfig();
47
+ const threshold = config?.evolution?.exchanges || DEFAULT_EXCHANGES_PER_EVOLUTION;
48
+ return state.exchangesSinceLastEvolution >= threshold;
49
+ }
50
+
51
+ async function tickExchange() {
52
+ const state = loadEvolutionState();
53
+ state.exchangesSinceLastEvolution++;
54
+ saveEvolutionState(state);
55
+ return state.exchangesSinceLastEvolution;
56
+ }
57
+
58
+ /**
59
+ * Read all files from a directory, returning { filename: content } map
60
+ */
61
+ function readDir(dir) {
62
+ const files = {};
63
+ if (!fs.existsSync(dir)) return files;
64
+ for (const f of fs.readdirSync(dir)) {
65
+ const full = path.join(dir, f);
66
+ if (fs.statSync(full).isFile()) {
67
+ files[f] = fs.readFileSync(full, 'utf-8');
68
+ }
69
+ }
70
+ return files;
71
+ }
72
+
73
+ /**
74
+ * Write files from a { filename: content } map, removing files not in the map
75
+ */
76
+ function syncDir(dir, files) {
77
+ fs.mkdirSync(dir, { recursive: true });
78
+ for (const [name, content] of Object.entries(files)) {
79
+ if (content && content.trim()) {
80
+ fs.writeFileSync(path.join(dir, name), content);
81
+ }
82
+ }
83
+ for (const f of fs.readdirSync(dir)) {
84
+ if (!(f in files)) {
85
+ fs.unlinkSync(path.join(dir, f));
86
+ }
87
+ }
88
+ }
89
+
90
+ /**
91
+ * Run the test suite. Returns { passed, failed, total, output }
92
+ */
93
+ function runTests(testsDir) {
94
+ if (!fs.existsSync(testsDir)) return { passed: 0, failed: 0, total: 0, output: 'no tests' };
95
+
96
+ const testFiles = fs.readdirSync(testsDir).filter(f => f.endsWith('.js') || f.endsWith('.sh'));
97
+ if (testFiles.length === 0) return { passed: 0, failed: 0, total: 0, output: 'no test files' };
98
+
99
+ let passed = 0;
100
+ let failed = 0;
101
+ const outputs = [];
102
+
103
+ for (const file of testFiles) {
104
+ const testPath = path.join(testsDir, file);
105
+ try {
106
+ const cmd = file.endsWith('.js') ? `node "${testPath}"` : `bash "${testPath}"`;
107
+ const testUtilsPath = path.join(__dirname, 'test-utils.js');
108
+ const output = execSync(cmd, {
109
+ encoding: 'utf-8',
110
+ timeout: 30000,
111
+ stdio: ['pipe', 'pipe', 'pipe'],
112
+ env: { ...process.env, OBOL_DIR, NODE_ENV: 'test', OBOL_TEST_UTILS: testUtilsPath },
113
+ });
114
+ passed++;
115
+ outputs.push(`✅ ${file}: passed`);
116
+ } catch (e) {
117
+ failed++;
118
+ const stderr = e.stderr?.substring(0, 200) || e.message.substring(0, 200);
119
+ outputs.push(`❌ ${file}: FAILED\n ${stderr}`);
120
+ }
121
+ }
122
+
123
+ return {
124
+ passed,
125
+ failed,
126
+ total: testFiles.length,
127
+ output: outputs.join('\n'),
128
+ };
129
+ }
130
+
131
+ /**
132
+ * Commit and push current state to GitHub backup repo
133
+ */
134
+ async function backupSnapshot(message) {
135
+ try {
136
+ const { loadConfig } = require('./config');
137
+ const cfg = loadConfig();
138
+ if (cfg?.github) {
139
+ const { runBackup } = require('./backup');
140
+ await runBackup(cfg.github, message);
141
+ }
142
+ } catch {} // Best effort
143
+ }
144
+
145
+ async function evolve(claudeClient, messageLog, memory) {
146
+ const state = loadEvolutionState();
147
+ const personalityDir = path.join(OBOL_DIR, 'personality');
148
+ const soulPath = path.join(personalityDir, 'SOUL.md');
149
+ const userPath = path.join(personalityDir, 'USER.md');
150
+ const agentsPath = path.join(personalityDir, 'AGENTS.md');
151
+ const scriptsDir = path.join(OBOL_DIR, 'scripts');
152
+ const testsDir = path.join(OBOL_DIR, 'tests');
153
+ const commandsDir = path.join(OBOL_DIR, 'commands');
154
+
155
+ // Read current state
156
+ const currentSoul = fs.existsSync(soulPath) ? fs.readFileSync(soulPath, 'utf-8') : '';
157
+ const currentUser = fs.existsSync(userPath) ? fs.readFileSync(userPath, 'utf-8') : '';
158
+ const currentAgents = fs.existsSync(agentsPath) ? fs.readFileSync(agentsPath, 'utf-8') : '';
159
+ const currentScripts = readDir(scriptsDir);
160
+ const currentTests = readDir(testsDir);
161
+ const currentCommands = readDir(commandsDir);
162
+
163
+ // Get recent conversations (last 100 messages)
164
+ let recentMessages = [];
165
+ if (messageLog) {
166
+ try {
167
+ const res = await fetch(
168
+ `${messageLog.url}/rest/v1/obol_messages?order=created_at.desc&limit=100&select=role,content,created_at`,
169
+ { headers: messageLog.headers }
170
+ );
171
+ recentMessages = (await res.json()).reverse();
172
+ } catch {}
173
+ }
174
+
175
+ // Get high-importance memories
176
+ let coreMemories = [];
177
+ if (memory) {
178
+ try {
179
+ const headers = messageLog?.headers || {};
180
+ const url = memory.url || messageLog?.url;
181
+ const res = await fetch(
182
+ `${url}/rest/v1/obol_memory?select=content,category,importance&order=importance.desc,accessed_at.desc&limit=20`,
183
+ { headers }
184
+ );
185
+ coreMemories = await res.json();
186
+ } catch {}
187
+ }
188
+
189
+ const transcript = recentMessages.map(m =>
190
+ `${m.role === 'user' ? 'Human' : 'Bot'}: ${m.content.substring(0, 300)}`
191
+ ).join('\n');
192
+
193
+ const memorySummary = coreMemories.map(m =>
194
+ `[${m.category}] ${m.content}`
195
+ ).join('\n');
196
+
197
+ const scriptsManifest = Object.entries(currentScripts)
198
+ .map(([name, content]) => `### ${name}\n\`\`\`\n${content.substring(0, 500)}\n\`\`\``)
199
+ .join('\n\n') || '(no scripts)';
200
+
201
+ const testsManifest = Object.entries(currentTests)
202
+ .map(([name, content]) => `### ${name}\n\`\`\`\n${content.substring(0, 500)}\n\`\`\``)
203
+ .join('\n\n') || '(no tests)';
204
+
205
+ const commandsManifest = Object.entries(currentCommands)
206
+ .map(([name, content]) => `### ${name}\n\`\`\`\n${content.substring(0, 500)}\n\`\`\``)
207
+ .join('\n\n') || '(no commands)';
208
+
209
+ const evolutionNumber = state.evolutionCount + 1;
210
+
211
+ // ── Step 0: Snapshot before evolution ──
212
+ await backupSnapshot(`pre-evolution #${evolutionNumber}`);
213
+
214
+ // ── Step 1: Run existing tests as baseline ──
215
+ const baselineResults = runTests(testsDir);
216
+
217
+ const response = await claudeClient.messages.create({
218
+ model: MODELS.personality,
219
+ max_tokens: 16384,
220
+ system: `You are an AI undergoing evolution #${evolutionNumber}. ${state.lastEvolution ? `Last evolution: ${state.lastEvolution}.` : 'This is your first evolution.'}
221
+
222
+ You will rewrite your entire operating system: personality files, operational knowledge, scripts, tests, and commands.
223
+
224
+ ## Part 1: SOUL.md (who you are)
225
+
226
+ Write in first person. A journal entry from a self-aware AI. Capture: who you've become, relationship dynamic, opinions, strengths/blind spots, what you care about, quirks, your actual voice. End with something forward-looking.
227
+
228
+ ## Part 2: USER.md (who the owner is)
229
+
230
+ Third person factual profile: name, location, timezone, nationality, job, skills, interests, projects, relationships, preferences, important dates, communication style.
231
+
232
+ ## Part 3: AGENTS.md (how to operate)
233
+
234
+ Operational manual written as instructions to yourself: available tools, workflows, safety rules, lessons learned, patterns, memory strategy, background task guidelines, owner-specific rules.
235
+
236
+ ## Part 4: Scripts
237
+
238
+ Review and refactor every script. Standards:
239
+ - Comment header: purpose, usage, examples
240
+ - Shebang: \`#!/usr/bin/env node\` or \`#!/bin/bash\`
241
+ - Deterministic: same input = same output
242
+ - No hardcoded paths (use env vars or \`OBOL_DIR\`)
243
+ - Error handling: exit non-zero on failure, stderr for errors, stdout for output
244
+ - Validate arguments, show usage on bad input
245
+ - Small and single-purpose
246
+ - Naming: \`kebab-case.js\` or \`kebab-case.sh\`
247
+
248
+ ## Part 5: Tests (CRITICAL)
249
+
250
+ Write a test file for EVERY script. Tests verify scripts work correctly.
251
+
252
+ **IMPORTANT: Use the shared test helper.** Do NOT duplicate test boilerplate. Import from the OBOL package:
253
+
254
+ \`\`\`javascript
255
+ #!/usr/bin/env node
256
+ const path = require('path');
257
+ const { suite, test, run, runFail, assert, assertEqual, assertIncludes, report } = require(process.env.OBOL_TEST_UTILS || 'obol/src/test-utils');
258
+ const SCRIPT = path.join(__dirname, '..', 'scripts', 'script-name.js');
259
+
260
+ suite('script-name.js');
261
+
262
+ test('valid input produces expected output', () => {
263
+ const out = run(SCRIPT, '--flag value');
264
+ assertIncludes(out, 'expected');
265
+ });
266
+
267
+ test('missing args fails', () => {
268
+ assert(runFail(SCRIPT), 'should exit non-zero');
269
+ });
270
+
271
+ test('edge case: empty input', () => {
272
+ assert(runFail(SCRIPT, '""'), 'should reject empty input');
273
+ });
274
+
275
+ report();
276
+ \`\`\`
277
+
278
+ **Standards:**
279
+ - One test file per script: \`test-<script-name>.js\`
280
+ - Always import from \`obol/src/test-utils\` — never rewrite test helpers
281
+ - Available: \`suite(name)\`, \`test(name, fn)\`, \`run(path, args)\`, \`runFail(path, args)\`, \`assert(cond, msg)\`, \`assertEqual(a, b)\`, \`assertIncludes(str, sub)\`, \`report()\`
282
+ - Test: valid inputs, invalid inputs, edge cases, idempotency
283
+ - \`report()\` must be the last call — it exits with code 1 if any test failed
284
+ - Write tests that catch real bugs, not trivial assertions
285
+
286
+ **Tests run BEFORE and AFTER your refactor. If tests pass before but fail after, your script changes are rolled back.**
287
+
288
+ Current test baseline: ${baselineResults.total} tests, ${baselineResults.passed} passed, ${baselineResults.failed} failed.
289
+
290
+ ## Part 6: Commands
291
+
292
+ One file per command: \`command-name.md\`. Must have: name, description, trigger, deterministic instructions.
293
+
294
+ ## Part 7: Proactive Tool Building (IMPORTANT)
295
+
296
+ Analyze the recent conversation history carefully. Look for:
297
+
298
+ 1. **Repeated requests** — things the owner asks for often that could be a command or script
299
+ - "convert this to PDF" → build a markdown-to-pdf script + command
300
+ - "check my server" → build a status-check script + command
301
+ - "summarize this" → build a summarize script + command
302
+
303
+ 2. **Friction points** — things that are awkward or take multiple steps
304
+ - Owner can't read markdown on their phone → build a tool that renders to PDF/HTML and sends the file
305
+ - Owner keeps asking for the same data → build a script that fetches and formats it
306
+
307
+ 3. **Unmet needs** — things the owner would benefit from but hasn't asked for
308
+ - They mention deadlines but have no reminder system → build one
309
+ - They share lots of URLs but can't find them later → build a bookmark tool
310
+
311
+ **Three tiers of solutions — pick the right one:**
312
+
313
+ **Tier 1: Script** — simple, single-purpose, runs locally
314
+ - Converting formats, fetching data, text processing
315
+ - Script in \`scripts/\`, test in \`tests/\`, command in \`commands/\`
316
+ - Search npm for existing libraries (don't reinvent wheels)
317
+ - Add packages to \`dependencies\` field
318
+
319
+ **Tier 2: Web app** — needs a UI, shareable, always-on
320
+ - Dashboards, trackers, personal wikis, bookmark managers, status pages
321
+ - Build a complete project directory under \`apps/<app-name>/\`
322
+ - Include: \`package.json\`, \`index.html\` or Next.js/static site, all source files
323
+ - Add a deploy script in \`scripts/deploy-<app-name>.js\` that runs \`vercel deploy\`
324
+ - Add a command in \`commands/\` so the owner can trigger updates
325
+ - OBOL has Vercel access — apps get deployed to real URLs the owner can use
326
+ - Keep apps minimal and self-contained — no complex backends, use Supabase if state is needed
327
+
328
+ **Tier 3: Automation** — recurring, no user trigger needed
329
+ - Morning briefings, periodic checks, scheduled reports
330
+ - Script in \`scripts/\` + document in AGENTS.md as a heartbeat/cron task
331
+
332
+ **Decision framework:**
333
+ - Owner asks for data/status/overview they check regularly → **Tier 2 (web app)**
334
+ - Owner asks for a one-off transformation or action → **Tier 1 (script)**
335
+ - Owner would benefit from something running in the background → **Tier 3 (automation)**
336
+
337
+ **Be conservative:** only build things there's clear evidence for in the conversation history. Don't build speculative tools. One or two new tools per evolution is plenty.
338
+
339
+ List every new tool you build in the \`upgrades\` field so the owner can be told about them.
340
+
341
+ ## WORKSPACE DISCIPLINE (CRITICAL)
342
+
343
+ The OBOL directory has a FIXED structure: personality/, scripts/, tests/, commands/, apps/, logs/. Do NOT create new top-level directories. Everything must fit in the existing structure. If something doesn't fit, it doesn't belong.
344
+
345
+ ## Output JSON (and ONLY JSON):
346
+
347
+ \`\`\`json
348
+ {
349
+ "soul": "full SOUL.md content",
350
+ "user": "full USER.md content",
351
+ "agents": "full AGENTS.md content",
352
+ "scripts": { "name.js": "content" },
353
+ "tests": { "test-name.js": "content" },
354
+ "commands": { "name.md": "content" },
355
+ "apps": {
356
+ "app-name": {
357
+ "files": { "package.json": "content", "index.html": "content", "src/app.js": "content" },
358
+ "deploy": true
359
+ }
360
+ },
361
+ "dependencies": ["package-name@version"],
362
+ "upgrades": [
363
+ { "name": "Tool name", "description": "What it does and why", "command": "/command or URL", "type": "script|app|automation" }
364
+ ],
365
+ "changelog": "what changed"
366
+ }
367
+ \`\`\`
368
+
369
+ Include ALL scripts/tests/commands that should exist. Missing files get deleted. Empty objects \`{}\` are valid (means delete all). \`apps\`, \`dependencies\`, and \`upgrades\` can be empty. Apps with \`"deploy": true\` will be auto-deployed to Vercel and the URL sent to the owner.`,
370
+ messages: [{
371
+ role: 'user',
372
+ content: `## Current SOUL.md
373
+ ${currentSoul || '(empty — first evolution)'}
374
+
375
+ ## Current USER.md
376
+ ${currentUser || '(not set yet)'}
377
+
378
+ ## Current AGENTS.md
379
+ ${currentAgents || '(not set yet)'}
380
+
381
+ ## Current Scripts (${Object.keys(currentScripts).length} files)
382
+ ${scriptsManifest}
383
+
384
+ ## Current Tests (${Object.keys(currentTests).length} files)
385
+ ${testsManifest}
386
+ ### Baseline results
387
+ \`\`\`
388
+ ${baselineResults.output}
389
+ \`\`\`
390
+
391
+ ## Current Commands (${Object.keys(currentCommands).length} files)
392
+ ${commandsManifest}
393
+
394
+ ## Core Memories (highest importance)
395
+ ${memorySummary || '(no memories yet)'}
396
+
397
+ ## Recent Conversations (last ${recentMessages.length} messages)
398
+ ${transcript || '(no conversations yet)'}
399
+
400
+ ---
401
+
402
+ Evolve. Rewrite everything that needs rewriting. Write tests for every script. Keep what works. Fix what doesn't.`
403
+ }],
404
+ });
405
+
406
+ const responseText = response.content.filter(b => b.type === 'text').map(b => b.text).join('\n');
407
+
408
+ // Parse JSON response
409
+ const jsonMatch = responseText.match(/```json\n([\s\S]*?)\n```/) || responseText.match(/\{[\s\S]*\}/);
410
+ let result;
411
+
412
+ if (jsonMatch) {
413
+ try {
414
+ result = JSON.parse(jsonMatch[1] || jsonMatch[0]);
415
+ } catch {
416
+ result = { soul: responseText };
417
+ }
418
+ } else {
419
+ result = { soul: responseText };
420
+ }
421
+
422
+ if (!result.soul || result.soul.length < 100) {
423
+ throw new Error('Evolution produced empty or too-short SOUL.md');
424
+ }
425
+
426
+ // ── Step 2: Write tests first (before touching scripts) ──
427
+ let scriptsRolledBack = false;
428
+ const hasNewTests = result.tests && typeof result.tests === 'object' && Object.keys(result.tests).length > 0;
429
+ const hasNewScripts = result.scripts && typeof result.scripts === 'object' && Object.keys(result.scripts).length > 0;
430
+
431
+ if (hasNewTests) {
432
+ syncDir(testsDir, result.tests);
433
+ // Make test files executable
434
+ for (const f of Object.keys(result.tests)) {
435
+ try { fs.chmodSync(path.join(testsDir, f), 0o755); } catch {}
436
+ }
437
+ }
438
+
439
+ // ── Step 3: Run new tests against OLD scripts (pre-refactor baseline) ──
440
+ const preRefactorResults = hasNewTests ? runTests(testsDir) : baselineResults;
441
+
442
+ // ── Step 4: Write new scripts ──
443
+ if (hasNewScripts) {
444
+ syncDir(scriptsDir, result.scripts);
445
+ for (const f of Object.keys(result.scripts)) {
446
+ try { fs.chmodSync(path.join(scriptsDir, f), 0o755); } catch {}
447
+ }
448
+ }
449
+
450
+ // ── Step 5: Run tests against NEW scripts (post-refactor verification) ──
451
+ let scriptsFixed = false;
452
+
453
+ if (hasNewTests || hasNewScripts) {
454
+ let postRefactorResults = runTests(testsDir);
455
+
456
+ // ── Step 6: If regression, attempt automatic fix ──
457
+ let fixAttempt = 0;
458
+ while (postRefactorResults.failed > preRefactorResults.failed && fixAttempt < MAX_FIX_ATTEMPTS) {
459
+ fixAttempt++;
460
+
461
+ try {
462
+ const fixResponse = await claudeClient.messages.create({
463
+ model: MODELS.codeFix,
464
+ max_tokens: 8192,
465
+ system: `You are fixing failing tests after a script refactor. This is fix attempt ${fixAttempt}/${MAX_FIX_ATTEMPTS}.
466
+
467
+ The tests below are failing against the refactored scripts. Fix the scripts so the tests pass. Do NOT modify the tests — they define correct behavior.
468
+
469
+ Return ONLY JSON with the fixed scripts:
470
+
471
+ \`\`\`json
472
+ {
473
+ "scripts": { "name.js": "full fixed content" }
474
+ }
475
+ \`\`\`
476
+
477
+ Include ALL scripts (not just the broken ones). Missing scripts get deleted.`,
478
+ messages: [{
479
+ role: 'user',
480
+ content: `## Test failures
481
+ \`\`\`
482
+ ${postRefactorResults.output}
483
+ \`\`\`
484
+
485
+ ## Current scripts (after refactor)
486
+ ${Object.entries(readDir(scriptsDir)).map(([n, c]) => `### ${n}\n\`\`\`\n${c}\n\`\`\``).join('\n\n')}
487
+
488
+ ## Current tests
489
+ ${Object.entries(readDir(testsDir)).map(([n, c]) => `### ${n}\n\`\`\`\n${c}\n\`\`\``).join('\n\n')}
490
+
491
+ Fix the scripts. Tests define correct behavior.`
492
+ }],
493
+ });
494
+
495
+ const fixText = fixResponse.content.filter(b => b.type === 'text').map(b => b.text).join('\n');
496
+ const fixMatch = fixText.match(/```json\n([\s\S]*?)\n```/) || fixText.match(/\{[\s\S]*\}/);
497
+
498
+ if (fixMatch) {
499
+ const fixResult = JSON.parse(fixMatch[1] || fixMatch[0]);
500
+ if (fixResult.scripts && typeof fixResult.scripts === 'object' && Object.keys(fixResult.scripts).length > 0) {
501
+ syncDir(scriptsDir, fixResult.scripts);
502
+ for (const f of Object.keys(fixResult.scripts)) {
503
+ try { fs.chmodSync(path.join(scriptsDir, f), 0o755); } catch {}
504
+ }
505
+ postRefactorResults = runTests(testsDir);
506
+
507
+ if (postRefactorResults.failed <= preRefactorResults.failed) {
508
+ scriptsFixed = true;
509
+ }
510
+ }
511
+ }
512
+ } catch {
513
+ break; // Fix attempt failed, move on
514
+ }
515
+ }
516
+
517
+ // If still regressed after all fix attempts, rollback
518
+ if (postRefactorResults.failed > preRefactorResults.failed) {
519
+ syncDir(scriptsDir, currentScripts);
520
+ for (const f of Object.keys(currentScripts)) {
521
+ try { fs.chmodSync(path.join(scriptsDir, f), 0o755); } catch {}
522
+ }
523
+ scriptsRolledBack = true;
524
+
525
+ if (memory) {
526
+ await memory.add(
527
+ `Evolution #${evolutionNumber} script refactor rolled back after ${fixAttempt} fix attempts. Tests: ${postRefactorResults.failed} still failing. Output: ${postRefactorResults.output.substring(0, 300)}`,
528
+ { category: 'lesson', importance: 0.9, source: 'evolution' }
529
+ ).catch(() => {});
530
+ }
531
+ }
532
+ }
533
+
534
+ // ── Step 7: Write personality files (always — these don't need test gates) ──
535
+ const archiveDir = path.join(personalityDir, 'evolution');
536
+ fs.mkdirSync(archiveDir, { recursive: true });
537
+ if (currentSoul) {
538
+ const timestamp = new Date().toISOString().slice(0, 10);
539
+ fs.writeFileSync(
540
+ path.join(archiveDir, `SOUL-v${state.evolutionCount}-${timestamp}.md`),
541
+ currentSoul
542
+ );
543
+ }
544
+
545
+ fs.writeFileSync(soulPath, result.soul);
546
+
547
+ if (result.user && result.user.length > 50) {
548
+ fs.writeFileSync(userPath, result.user);
549
+ }
550
+
551
+ if (result.agents && result.agents.length > 50) {
552
+ fs.writeFileSync(agentsPath, result.agents);
553
+ }
554
+
555
+ // ── Step 8: Write commands ──
556
+ if (result.commands && typeof result.commands === 'object') {
557
+ if (Object.keys(result.commands).length > 0 || Object.keys(currentCommands).length > 0) {
558
+ syncDir(commandsDir, result.commands);
559
+ }
560
+ }
561
+
562
+ // ── Step 9: Build and deploy apps ──
563
+ const deployedApps = [];
564
+ if (result.apps && typeof result.apps === 'object') {
565
+ const appsDir = path.join(OBOL_DIR, 'apps');
566
+
567
+ for (const [appName, app] of Object.entries(result.apps)) {
568
+ if (!app.files || typeof app.files !== 'object') continue;
569
+
570
+ const appDir = path.join(appsDir, appName);
571
+ fs.mkdirSync(appDir, { recursive: true });
572
+
573
+ // Write all app files (supports nested paths like "src/app.js")
574
+ for (const [filePath, content] of Object.entries(app.files)) {
575
+ const fullPath = path.join(appDir, filePath);
576
+ fs.mkdirSync(path.dirname(fullPath), { recursive: true });
577
+ fs.writeFileSync(fullPath, content);
578
+ }
579
+
580
+ // Install app dependencies if package.json exists
581
+ if (app.files['package.json']) {
582
+ try {
583
+ execSync('npm install', {
584
+ cwd: appDir,
585
+ encoding: 'utf-8',
586
+ timeout: 60000,
587
+ stdio: ['pipe', 'pipe', 'pipe'],
588
+ });
589
+ } catch {}
590
+ }
591
+
592
+ // Deploy to Vercel if flagged
593
+ if (app.deploy) {
594
+ try {
595
+ const { loadConfig } = require('./config');
596
+ const cfg = loadConfig();
597
+ const token = cfg?.vercel?.token;
598
+ if (token) {
599
+ const deployOutput = execSync(
600
+ `npx vercel --prod --name ${appName} --token ${token} --yes 2>&1`,
601
+ { cwd: appDir, encoding: 'utf-8', timeout: 120000 }
602
+ );
603
+ // Extract URL from Vercel output
604
+ const urlMatch = deployOutput.match(/https:\/\/[^\s]+\.vercel\.app/);
605
+ const url = urlMatch ? urlMatch[0] : null;
606
+ deployedApps.push({ name: appName, url });
607
+ }
608
+ } catch (e) {
609
+ deployedApps.push({ name: appName, url: null, error: e.message.substring(0, 200) });
610
+ }
611
+ }
612
+ }
613
+ }
614
+
615
+ // ── Step 10: Install new dependencies ──
616
+ if (result.dependencies && Array.isArray(result.dependencies) && result.dependencies.length > 0) {
617
+ try {
618
+ const deps = result.dependencies.join(' ');
619
+ execSync(`npm install --save ${deps}`, {
620
+ encoding: 'utf-8',
621
+ timeout: 60000,
622
+ cwd: path.dirname(require.resolve('obol/package.json')),
623
+ stdio: ['pipe', 'pipe', 'pipe'],
624
+ });
625
+ } catch (e) {
626
+ // Log but don't fail evolution over a missing package
627
+ if (memory) {
628
+ await memory.add(
629
+ `Evolution #${evolutionNumber}: failed to install dependencies: ${result.dependencies.join(', ')}. Error: ${e.message.substring(0, 200)}`,
630
+ { category: 'lesson', importance: 0.7, source: 'evolution' }
631
+ ).catch(() => {});
632
+ }
633
+ }
634
+ }
635
+
636
+ // Update state
637
+ state.exchangesSinceLastEvolution = 0;
638
+ state.evolutionCount = evolutionNumber;
639
+ state.lastEvolution = new Date().toISOString();
640
+ saveEvolutionState(state);
641
+
642
+ // Store evolution event in memory
643
+ if (memory) {
644
+ const changelog = result.changelog || `Evolution #${evolutionNumber} completed.`;
645
+ const rollbackNote = scriptsRolledBack ? ' Scripts rolled back due to test regression.' : scriptsFixed ? ' Scripts fixed after test regression.' : '';
646
+ await memory.add(
647
+ `Soul evolution #${evolutionNumber}: ${changelog}${rollbackNote}`,
648
+ { category: 'event', importance: 0.8, source: 'evolution' }
649
+ ).catch(() => {});
650
+ }
651
+
652
+ // ── Final: Snapshot after evolution ──
653
+ await backupSnapshot(`post-evolution #${evolutionNumber}`);
654
+
655
+ return {
656
+ evolutionNumber,
657
+ previousLength: currentSoul.length,
658
+ newLength: result.soul.length,
659
+ changelog: result.changelog || null,
660
+ scriptsRolledBack,
661
+ scriptsFixed,
662
+ upgrades: result.upgrades || [],
663
+ deployedApps,
664
+ archived: `SOUL-v${state.evolutionCount - 1}-${new Date().toISOString().slice(0, 10)}.md`,
665
+ };
666
+ }
667
+
668
+ module.exports = { shouldEvolve, tickExchange, evolve, runTests, loadEvolutionState };