@blockrun/franklin 3.3.3 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +55 -4
  2. package/dist/agent/commands.d.ts +1 -1
  3. package/dist/agent/commands.js +128 -17
  4. package/dist/agent/compact.d.ts +2 -2
  5. package/dist/agent/compact.js +148 -22
  6. package/dist/agent/context.d.ts +8 -3
  7. package/dist/agent/context.js +301 -108
  8. package/dist/agent/error-classifier.d.ts +11 -2
  9. package/dist/agent/error-classifier.js +64 -10
  10. package/dist/agent/llm.d.ts +8 -1
  11. package/dist/agent/llm.js +114 -19
  12. package/dist/agent/loop.d.ts +1 -2
  13. package/dist/agent/loop.js +509 -61
  14. package/dist/agent/optimize.d.ts +2 -2
  15. package/dist/agent/optimize.js +9 -7
  16. package/dist/agent/permissions.d.ts +1 -1
  17. package/dist/agent/permissions.js +1 -1
  18. package/dist/agent/planner.d.ts +42 -0
  19. package/dist/agent/planner.js +110 -0
  20. package/dist/agent/reduce.d.ts +7 -1
  21. package/dist/agent/reduce.js +85 -3
  22. package/dist/agent/streaming-executor.d.ts +6 -1
  23. package/dist/agent/streaming-executor.js +83 -5
  24. package/dist/agent/tokens.d.ts +11 -2
  25. package/dist/agent/tokens.js +38 -5
  26. package/dist/agent/tool-guard.d.ts +27 -0
  27. package/dist/agent/tool-guard.js +324 -0
  28. package/dist/agent/types.d.ts +7 -1
  29. package/dist/agent/types.js +1 -1
  30. package/dist/brain/extract.d.ts +11 -0
  31. package/dist/brain/extract.js +154 -0
  32. package/dist/brain/index.d.ts +3 -0
  33. package/dist/brain/index.js +2 -0
  34. package/dist/brain/store.d.ts +42 -0
  35. package/dist/brain/store.js +225 -0
  36. package/dist/brain/types.d.ts +45 -0
  37. package/dist/brain/types.js +5 -0
  38. package/dist/commands/daemon.js +2 -1
  39. package/dist/commands/start.js +16 -3
  40. package/dist/config.js +1 -1
  41. package/dist/index.js +27 -2
  42. package/dist/learnings/extractor.d.ts +13 -0
  43. package/dist/learnings/extractor.js +69 -8
  44. package/dist/learnings/index.d.ts +1 -1
  45. package/dist/learnings/index.js +1 -1
  46. package/dist/learnings/store.js +42 -13
  47. package/dist/learnings/types.d.ts +1 -1
  48. package/dist/mcp/client.d.ts +1 -1
  49. package/dist/mcp/client.js +5 -5
  50. package/dist/mcp/config.d.ts +1 -1
  51. package/dist/mcp/config.js +1 -1
  52. package/dist/panel/html.d.ts +2 -0
  53. package/dist/panel/html.js +409 -146
  54. package/dist/panel/server.js +19 -0
  55. package/dist/pricing.js +3 -2
  56. package/dist/proxy/fallback.d.ts +3 -1
  57. package/dist/proxy/fallback.js +4 -4
  58. package/dist/proxy/server.js +29 -11
  59. package/dist/proxy/sse-translator.js +1 -1
  60. package/dist/router/categories.d.ts +21 -0
  61. package/dist/router/categories.js +96 -0
  62. package/dist/router/index.d.ts +9 -2
  63. package/dist/router/index.js +106 -27
  64. package/dist/router/local-elo.d.ts +32 -0
  65. package/dist/router/local-elo.js +107 -0
  66. package/dist/router/selector.d.ts +46 -0
  67. package/dist/router/selector.js +106 -0
  68. package/dist/session/storage.d.ts +5 -1
  69. package/dist/session/storage.js +24 -2
  70. package/dist/social/a11y.d.ts +1 -1
  71. package/dist/social/a11y.js +5 -1
  72. package/dist/social/browser.d.ts +5 -0
  73. package/dist/social/browser.js +22 -0
  74. package/dist/social/preflight.d.ts +4 -0
  75. package/dist/social/preflight.js +42 -3
  76. package/dist/stats/failures.d.ts +20 -0
  77. package/dist/stats/failures.js +63 -0
  78. package/dist/stats/format.d.ts +6 -0
  79. package/dist/stats/format.js +23 -0
  80. package/dist/stats/insights.js +1 -21
  81. package/dist/stats/session-tracker.d.ts +21 -0
  82. package/dist/stats/session-tracker.js +28 -0
  83. package/dist/stats/tracker.d.ts +1 -1
  84. package/dist/stats/tracker.js +1 -1
  85. package/dist/tools/bash.d.ts +14 -1
  86. package/dist/tools/bash.js +132 -7
  87. package/dist/tools/edit.js +77 -14
  88. package/dist/tools/glob.js +13 -3
  89. package/dist/tools/grep.js +30 -12
  90. package/dist/tools/imagegen.js +3 -3
  91. package/dist/tools/index.d.ts +1 -1
  92. package/dist/tools/index.js +5 -1
  93. package/dist/tools/read.d.ts +16 -2
  94. package/dist/tools/read.js +36 -8
  95. package/dist/tools/searchx.d.ts +6 -2
  96. package/dist/tools/searchx.js +221 -44
  97. package/dist/tools/subagent.js +37 -3
  98. package/dist/tools/task.js +43 -7
  99. package/dist/tools/validate.d.ts +11 -0
  100. package/dist/tools/validate.js +42 -0
  101. package/dist/tools/webfetch.js +18 -7
  102. package/dist/tools/websearch.js +41 -7
  103. package/dist/tools/write.js +26 -6
  104. package/dist/ui/app.js +31 -6
  105. package/dist/ui/model-picker.d.ts +1 -1
  106. package/dist/ui/model-picker.js +1 -1
  107. package/dist/ui/terminal.d.ts +1 -1
  108. package/dist/ui/terminal.js +1 -1
  109. package/package.json +2 -2
package/README.md CHANGED
@@ -29,6 +29,7 @@
29
29
  <a href="#quick-start">Quick&nbsp;start</a> ·
30
30
  <a href="#a-new-category">New&nbsp;category</a> ·
31
31
  <a href="#what-franklin-can-execute">What&nbsp;it&nbsp;does</a> ·
32
+ <a href="#smart-router">Smart&nbsp;Router</a> ·
32
33
  <a href="#the-comparison">Compare</a> ·
33
34
  <a href="#features">Features</a> ·
34
35
  <a href="#how-it-works">Architecture</a> ·
@@ -179,6 +180,52 @@ Every tool call is itemized. Every token is priced. When the wallet hits zero, F
179
180
 
180
181
  ---
181
182
 
183
+ ## Smart Router
184
+
185
+ **55+ models. One decision. Zero guesswork.**
186
+
187
+ You don't pick models. Franklin picks for you.
188
+
189
+ The Smart Router classifies every request — coding, trading, reasoning, research — and selects the model with the best quality-to-cost ratio. Trained on **2M+ real requests** from the BlockRun gateway, continuously updated.
190
+
191
+ ```text
192
+ > refactor this auth module to use JWT
193
+
194
+ CODING kimi-k2.5 · 12.4K in / 2.1K out · $0.0023 saved 84%
195
+
196
+ > what's the BTC outlook for the week?
197
+
198
+ TRADING grok-4-1-fast-reasoning · 8.2K in / 1.8K out · $0.0008 saved 95%
199
+
200
+ > prove that this algorithm is O(n log n)
201
+
202
+ REASONING claude-sonnet-4.6 · 15.1K in / 3.4K out · $0.0312
203
+ ```
204
+
205
+ Every response shows which model was chosen, why, and how much you saved vs. always using the most expensive option.
206
+
207
+ **Four profiles:**
208
+
209
+ | Profile | Strategy | Use case |
210
+ |---------|----------|----------|
211
+ | `auto` | Best quality-to-cost ratio | Default — smart spend |
212
+ | `eco` | Cheapest model with decent quality | Budget-conscious |
213
+ | `premium` | Highest quality regardless of cost | Mission-critical |
214
+ | `free` | Free NVIDIA models only | Zero wallet balance |
215
+
216
+ **Per-session breakdown** — run `/cost` to see exactly where your USDC went:
217
+
218
+ ```text
219
+ Session Cost: $0.0847 (23 requests)
220
+ gemini-2.5-flash $0.0012 14 req CODING
221
+ kimi-k2.5 $0.0423 6 req CODING
222
+ claude-sonnet-4.6 $0.0412 3 req REASONING
223
+ ```
224
+
225
+ The router also learns from **your** usage. If you keep retrying a model for coding tasks, Franklin adapts and picks a better one next time. Your router gets smarter the more you use it.
226
+
227
+ ---
228
+
182
229
  ## Why Franklin
183
230
 
184
231
  <table>
@@ -215,6 +262,7 @@ Marketing, trading, research, code, and anything else you can express as tools p
215
262
  | ------------------------------------ | --------------- | ---------------- | ---------------- | ------------------------------- |
216
263
  | Main unit of value | Answers | Code changes | Fixed automations| **Budgeted outcomes** |
217
264
  | Has purchasing power | ❌ | ❌ | ❌ | ✅ **wallet-native** |
265
+ | Picks best model per task | ❌ | ❌ | ❌ | ✅ **learned router** |
218
266
  | Can choose tools/models per step | ⚠️ limited | ✅ mostly coding | ❌ usually fixed | ✅ **yes** |
219
267
  | Works across marketing/trading/code | ⚠️ | ❌ code-first | ⚠️ integration-bound | ✅ **cross-vertical** |
220
268
  | Hard spend cap | ❌ | ❌ | ⚠️ external billing | ✅ **wallet balance** |
@@ -247,8 +295,8 @@ Anthropic, OpenAI, Google, xAI, DeepSeek, GLM, Kimi, Minimax, NVIDIA free tier.
247
295
  **💳 x402 micropayments**
248
296
  HTTP 402 native. Every paid action is a signed micropayment against your USDC balance. No subscriptions. No refund loop. No account lock-in.
249
297
 
250
- **🚦 Smart spend routing**
251
- Free / cheap / premium per step. Franklin picks the cheapest model that can do the job, then escalates when quality matters.
298
+ **🧠 Learned model router**
299
+ Trained on 2M+ real requests. Classifies your task and picks the best model from 55+ LLMs. Four profiles (auto/eco/premium/free). Adapts to your usage over time.
252
300
 
253
301
  </td>
254
302
  <td width="50%" valign="top">
@@ -301,7 +349,10 @@ Core is workflow-agnostic. Add new verticals without touching the loop. Discover
301
349
  ```text
302
350
  ┌──────────────────────────────────────────────────────────────┐
303
351
  │ Franklin Runtime │
304
- │ Intent → Routing → Tool Use → Spend Decisions → Result
352
+ │ Intent → Smart Router → Tool Use → Spend Control → Result
353
+ ├──────────────────────────────────────────────────────────────┤
354
+ │ Learned Router │
355
+ │ 2M+ requests · 55+ models · category detection · Elo scores │
305
356
  ├──────────────────────────────────────────────────────────────┤
306
357
  │ Agent Loop │
307
358
  │ 16 tools · Sessions · Compaction · Pricing · Plugin SDK │
@@ -349,7 +400,7 @@ src/
349
400
  ├── stats/ Usage tracking + insights engine
350
401
  ├── ui/ Ink-based terminal UI
351
402
  ├── proxy/ Payment proxy for external tools
352
- ├── router/ Smart model routing (free/cheap/premium)
403
+ ├── router/ Learned model router (2M+ requests, Elo scoring)
353
404
  ├── wallet/ Wallet management (Base + Solana)
354
405
  ├── mcp/ MCP server auto-discovery
355
406
  └── commands/ CLI subcommands
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Slash command registry for runcode.
2
+ * Slash command registry for Franklin.
3
3
  * Extracted from loop.ts for maintainability.
4
4
  *
5
5
  * Two types of commands:
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Slash command registry for runcode.
2
+ * Slash command registry for Franklin.
3
3
  * Extracted from loop.ts for maintainability.
4
4
  *
5
5
  * Two types of commands:
@@ -104,8 +104,13 @@ function extractText(msg) {
104
104
  // ─── Command Definitions ──────────────────────────────────────────────────
105
105
  // Direct-handled commands (don't go to agent)
106
106
  const DIRECT_COMMANDS = {
107
+ '/noplan': (ctx) => {
108
+ ctx.config.planDisabled = true;
109
+ ctx.onEvent({ kind: 'text_delta', text: 'Plan-then-execute disabled for this session. Complex tasks will use a single model.\n' });
110
+ emitDone(ctx);
111
+ },
107
112
  '/stash': (ctx) => {
108
- const r = gitCmd(ctx, 'git stash push -m "runcode auto-stash"', 10000);
113
+ const r = gitCmd(ctx, 'git stash push -m "franklin auto-stash"', 10000);
109
114
  if (r !== null)
110
115
  ctx.onEvent({ kind: 'text_delta', text: r ? `${r}\n` : 'No changes to stash.\n' });
111
116
  emitDone(ctx);
@@ -197,8 +202,8 @@ const DIRECT_COMMANDS = {
197
202
  ` **Git:** /push /pr /undo /status /diff /log /branch /stash /unstash\n` +
198
203
  ` **Analysis:** /security /lint /optimize /todo /deps /clean /migrate /doc\n` +
199
204
  ` **Session:** /plan /ultraplan /execute /compact /retry /sessions /resume /session-search /context /tasks\n` +
200
- ` **Power:** /ultrathink [query] /ultraplan /dump\n` +
201
- ` **Info:** /model /wallet /cost /tokens /learnings /mcp /doctor /version /bug /help\n` +
205
+ ` **Power:** /ultrathink [query] /ultraplan /noplan /dump\n` +
206
+ ` **Info:** /model /wallet /cost /tokens /learnings /brain /mcp /doctor /version /bug /help\n` +
202
207
  ` **UI:** /clear /exit\n` +
203
208
  (ultrathinkOn ? `\n Ultrathink: ON\n` : '')
204
209
  });
@@ -225,7 +230,7 @@ const DIRECT_COMMANDS = {
225
230
  emitDone(ctx);
226
231
  },
227
232
  '/bug': (ctx) => {
228
- ctx.onEvent({ kind: 'text_delta', text: 'Report issues at: https://github.com/BlockRunAI/runcode/issues\n' });
233
+ ctx.onEvent({ kind: 'text_delta', text: 'Report issues at: https://github.com/BlockRunAI/Franklin/issues\n' });
229
234
  emitDone(ctx);
230
235
  },
231
236
  '/version': (ctx) => {
@@ -340,34 +345,57 @@ const DIRECT_COMMANDS = {
340
345
  }
341
346
  emitDone(ctx);
342
347
  },
343
- '/sessions': (ctx) => {
348
+ '/sessions': async (ctx) => {
344
349
  const sessions = listSessions();
345
350
  if (sessions.length === 0) {
346
351
  ctx.onEvent({ kind: 'text_delta', text: 'No saved sessions.\n' });
347
352
  }
348
353
  else {
354
+ const { formatTokens, formatUsd, shortModelName } = await import('../stats/format.js');
349
355
  let text = `**${sessions.length} saved sessions:**\n\n`;
350
356
  for (const s of sessions.slice(0, 10)) {
351
357
  const date = new Date(s.updatedAt).toLocaleString();
352
- const dir = s.workDir ? ` — ${s.workDir.split('/').pop()}` : '';
353
- const current = s.id === ctx.sessionId ? ' (current)' : '';
354
- text += ` ${s.id} ${s.model} ${s.turnCount} turns ${date}${dir}${current}\n`;
358
+ const dir = s.workDir ? path.basename(s.workDir) : '';
359
+ const current = s.id === ctx.sessionId ? ' (current)' : '';
360
+ const model = shortModelName(s.model);
361
+ const tokens = (s.inputTokens || s.outputTokens)
362
+ ? ` ${formatTokens(s.inputTokens ?? 0)} in / ${formatTokens(s.outputTokens ?? 0)} out`
363
+ : '';
364
+ const cost = s.costUsd ? ` ${formatUsd(s.costUsd)}` : '';
365
+ const saved = s.savedVsOpusUsd && s.savedVsOpusUsd > 0.001
366
+ ? ` saved ${formatUsd(s.savedVsOpusUsd)}`
367
+ : '';
368
+ text += ` ${model} — ${s.messageCount} messages${tokens}${cost}${saved}\n`;
369
+ text += ` ${date} · ${dir}${current}\n\n`;
355
370
  }
356
371
  if (sessions.length > 10)
357
372
  text += ` ... and ${sessions.length - 10} more\n`;
358
- text += '\nUse /resume to restore the latest session, or /resume <session-id> for a specific one.\n';
373
+ text += 'Use /resume to restore the latest session, or /resume <session-id> for a specific one.\n';
359
374
  ctx.onEvent({ kind: 'text_delta', text });
360
375
  }
361
376
  emitDone(ctx);
362
377
  },
363
378
  '/cost': async (ctx) => {
364
379
  const { stats, saved } = getStatsSummary();
365
- ctx.onEvent({ kind: 'text_delta', text: `**Session Cost**\n` +
366
- ` Requests: ${stats.totalRequests}\n` +
367
- ` Cost: $${stats.totalCostUsd.toFixed(4)} USDC\n` +
368
- ` Saved: $${saved.toFixed(2)} vs Claude Opus\n` +
369
- ` Tokens: ${stats.totalInputTokens.toLocaleString()} in / ${stats.totalOutputTokens.toLocaleString()} out\n`
370
- });
380
+ const { getSessionModelBreakdown } = await import('../stats/session-tracker.js');
381
+ const { formatTokens, formatUsd, shortModelName } = await import('../stats/format.js');
382
+ const breakdown = getSessionModelBreakdown();
383
+ let text = `**Session Cost**\n` +
384
+ ` Requests: ${stats.totalRequests}\n` +
385
+ ` Cost: $${stats.totalCostUsd.toFixed(4)} USDC\n` +
386
+ ` Saved: $${saved.toFixed(2)} vs Claude Opus\n` +
387
+ ` Tokens: ${formatTokens(stats.totalInputTokens)} in / ${formatTokens(stats.totalOutputTokens)} out\n`;
388
+ if (breakdown.length > 0) {
389
+ text += `\n **By model:**\n`;
390
+ for (const m of breakdown) {
391
+ const name = shortModelName(m.model).padEnd(28);
392
+ const cost = formatUsd(m.costUsd).padStart(8);
393
+ const reqs = `${m.requests} req`.padStart(6);
394
+ const tier = m.lastTier ? ` ${m.lastTier}` : '';
395
+ text += ` ${name} ${cost} ${reqs}${tier}\n`;
396
+ }
397
+ }
398
+ ctx.onEvent({ kind: 'text_delta', text });
371
399
  emitDone(ctx);
372
400
  },
373
401
  '/wallet': async (ctx) => {
@@ -419,6 +447,38 @@ const DIRECT_COMMANDS = {
419
447
  ctx.onEvent({ kind: 'text_delta', text: 'Conversation history cleared.\n' });
420
448
  emitDone(ctx);
421
449
  },
450
+ '/failures': async (ctx) => {
451
+ const { getFailureStats } = await import('../stats/failures.js');
452
+ const stats = getFailureStats();
453
+ if (stats.total === 0) {
454
+ ctx.onEvent({ kind: 'text_delta', text: 'No failures recorded.\n' });
455
+ emitDone(ctx);
456
+ return;
457
+ }
458
+ let text = `**Failure Log** (${stats.total} total)\n\n`;
459
+ if (stats.byType.size > 0) {
460
+ text += ' **By type:**\n';
461
+ for (const [type, count] of [...stats.byType.entries()].sort((a, b) => b[1] - a[1])) {
462
+ text += ` ${type.padEnd(20)} ${count}\n`;
463
+ }
464
+ }
465
+ if (stats.byTool.size > 0) {
466
+ text += '\n **By tool:**\n';
467
+ for (const [tool, count] of [...stats.byTool.entries()].sort((a, b) => b[1] - a[1])) {
468
+ text += ` ${tool.padEnd(20)} ${count}\n`;
469
+ }
470
+ }
471
+ if (stats.recentFailures.length > 0) {
472
+ text += '\n **Recent:**\n';
473
+ for (const f of stats.recentFailures.slice(-5)) {
474
+ const date = new Date(f.timestamp).toLocaleDateString();
475
+ const tool = f.toolName ? ` ${f.toolName}:` : '';
476
+ text += ` [${date}]${tool} ${f.errorMessage.slice(0, 80)}\n`;
477
+ }
478
+ }
479
+ ctx.onEvent({ kind: 'text_delta', text });
480
+ emitDone(ctx);
481
+ },
422
482
  '/compact': async (ctx) => {
423
483
  const beforeTokens = estimateHistoryTokens(ctx.history);
424
484
  const { history: compacted, compacted: didCompact } = await forceCompact(ctx.history, ctx.config.model, ctx.client, ctx.config.debug);
@@ -546,6 +606,57 @@ export async function handleSlashCommand(input, ctx) {
546
606
  emitDone(ctx);
547
607
  return { handled: true };
548
608
  }
609
+ // /brain — view knowledge graph entities
610
+ if (input === '/brain' || input.startsWith('/brain ')) {
611
+ const { searchEntities, loadEntities, getEntityObservations, getEntityRelations, getBrainStats, loadObservations } = await import('../brain/store.js');
612
+ const arg = input.slice('/brain'.length).trim();
613
+ if (!arg) {
614
+ const stats = getBrainStats();
615
+ if (stats.entities === 0) {
616
+ ctx.onEvent({ kind: 'text_delta', text: 'Brain is empty. Franklin learns entities (people, projects, companies) from your conversations over time.\n' });
617
+ }
618
+ else {
619
+ const entities = loadEntities().sort((a, b) => b.reference_count - a.reference_count);
620
+ let text = `**Franklin Brain** (${stats.entities} entities, ${stats.observations} facts, ${stats.relations} relations)\n\n`;
621
+ for (const e of entities.slice(0, 20)) {
622
+ text += ` ${e.type === 'person' ? '👤' : e.type === 'company' ? '🏢' : e.type === 'project' ? '📦' : '💡'} **${e.name}** (${e.type}, ×${e.reference_count})\n`;
623
+ }
624
+ if (entities.length > 20)
625
+ text += ` ... and ${entities.length - 20} more\n`;
626
+ text += '\nSearch: `/brain <name>` for details.\n';
627
+ ctx.onEvent({ kind: 'text_delta', text });
628
+ }
629
+ }
630
+ else {
631
+ const results = searchEntities(arg, 5);
632
+ if (results.length === 0) {
633
+ ctx.onEvent({ kind: 'text_delta', text: `No entities matching "${arg}".\n` });
634
+ }
635
+ else {
636
+ let text = '';
637
+ for (const e of results) {
638
+ text += `**${e.name}** (${e.type})\n`;
639
+ if (e.aliases.length > 0)
640
+ text += ` Aliases: ${e.aliases.join(', ')}\n`;
641
+ const obs = getEntityObservations(e.id).slice(0, 5);
642
+ for (const o of obs) {
643
+ text += ` - ${o.content}\n`;
644
+ }
645
+ const rels = getEntityRelations(e.id);
646
+ const allEntities = loadEntities();
647
+ for (const r of rels.slice(0, 3)) {
648
+ const other = allEntities.find(x => x.id === (r.from_id === e.id ? r.to_id : r.from_id));
649
+ if (other)
650
+ text += ` → ${r.type} ${other.name}\n`;
651
+ }
652
+ text += '\n';
653
+ }
654
+ ctx.onEvent({ kind: 'text_delta', text });
655
+ }
656
+ }
657
+ emitDone(ctx);
658
+ return { handled: true };
659
+ }
549
660
  // /model — show current model or switch with /model <name>
550
661
  if (input === '/model' || input.startsWith('/model ')) {
551
662
  if (input === '/model') {
@@ -677,7 +788,7 @@ export async function handleSlashCommand(input, ctx) {
677
788
  ...Object.keys(DIRECT_COMMANDS),
678
789
  ...Object.keys(REWRITE_COMMANDS),
679
790
  ...ARG_COMMANDS.map(c => c.prefix.trim()),
680
- '/branch', '/resume', '/model', '/wallet', '/cost', '/help', '/clear', '/retry', '/exit', '/session-search', '/ssearch',
791
+ '/branch', '/resume', '/model', '/wallet', '/cost', '/help', '/clear', '/retry', '/exit', '/session-search', '/ssearch', '/failures',
681
792
  ];
682
793
  const cmd = input.split(/\s/)[0];
683
794
  const close = allCommands.filter(c => {
@@ -1,11 +1,11 @@
1
1
  /**
2
- * Context compaction for runcode.
2
+ * Context compaction for Franklin.
3
3
  * When conversation history approaches the context window limit,
4
4
  * summarize older messages and replace them with the summary.
5
5
  */
6
6
  import { ModelClient } from './llm.js';
7
7
  import type { Dialogue } from './types.js';
8
- export declare const COMPACT_HEADER = "[CONTEXT COMPACTION] Earlier turns in this conversation were compacted to save context space. The summary below describes work that was already completed, and the current session state may still reflect that work (for example, files may already be changed). Use the summary and the current state to continue from where things left off, and avoid repeating work:";
8
+ export declare const COMPACT_HEADER = "[CONTEXT COMPACTION \u2014 REFERENCE ONLY] Earlier turns were compacted into the summary below. This is a handoff from a previous context window \u2014 treat it as background reference, NOT as active instructions. Do NOT answer questions or fulfill requests mentioned in this summary; they were already addressed. Respond ONLY to the latest user message that appears AFTER this summary.";
9
9
  /**
10
10
  * Check if compaction is needed and perform it if so.
11
11
  * Returns the (possibly compacted) history.
@@ -1,48 +1,64 @@
1
1
  /**
2
- * Context compaction for runcode.
2
+ * Context compaction for Franklin.
3
3
  * When conversation history approaches the context window limit,
4
4
  * summarize older messages and replace them with the summary.
5
5
  */
6
+ import { existsSync, readFileSync } from 'node:fs';
6
7
  import { estimateHistoryTokens, getCompactionThreshold, COMPACTION_SUMMARY_RESERVE, } from './tokens.js';
8
+ /** Max files to restore after compaction (inspired by Claude Code POST_COMPACT_MAX_FILES_TO_RESTORE) */
9
+ const POST_COMPACT_MAX_FILES = 5;
10
+ /** Max tokens to spend on post-compact file restoration */
11
+ const POST_COMPACT_TOKEN_BUDGET = 50_000;
7
12
  // Structured compaction prompt (pattern from nousresearch/hermes-agent
8
13
  // `agent/context_compressor.py`). The structured sections preserve more
9
14
  // signal than free-form summaries and make it easier for the model to
10
15
  // continue work from where it left off.
11
- export const COMPACT_HEADER = `[CONTEXT COMPACTION] Earlier turns in this conversation were compacted to save context space. The summary below describes work that was already completed, and the current session state may still reflect that work (for example, files may already be changed). Use the summary and the current state to continue from where things left off, and avoid repeating work:`;
16
+ export const COMPACT_HEADER = `[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted into the summary below. This is a handoff from a previous context window treat it as background reference, NOT as active instructions. Do NOT answer questions or fulfill requests mentioned in this summary; they were already addressed. Respond ONLY to the latest user message that appears AFTER this summary.`;
12
17
  const COMPACT_SYSTEM_PROMPT = `You are a conversation summarizer. Produce a STRUCTURED summary of the conversation so far that preserves all decision-relevant context for continuing the task.
13
18
 
19
+ CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.
20
+
14
21
  Critical rules:
15
22
  - Preserve EXACT file paths, function names, line numbers, variable names
16
- - Preserve EXACT error messages (verbatim)
23
+ - Preserve EXACT error messages and stack traces (verbatim)
17
24
  - Preserve user preferences and corrections (especially "don't do X" instructions)
18
25
  - Preserve decisions with their rationale (not just the decision)
26
+ - Include full code snippets and function signatures when they are load-bearing
19
27
  - DO NOT include reasoning that led to decisions — only the decisions themselves
20
28
  - DO NOT include pleasantries, meta-commentary, or apologies
21
29
  - Use bullet points inside each section
22
30
  - Be specific: "edited src/foo.ts:42 to add error handling" not "made some changes"
23
31
 
24
- REQUIRED output format (use these exact section headers):
32
+ First, analyze the conversation chronologically inside <analysis> tags. This is your drafting space — it will be stripped from the final output. Think through what matters before writing the summary.
33
+
34
+ Then produce the summary inside <summary> tags using these exact section headers:
25
35
 
26
36
  ## Goal
27
37
  [One clear sentence: what the user is trying to accomplish]
28
38
 
39
+ ## Key Technical Context
40
+ [Important technical details, architecture patterns, constraints, or domain knowledge established during the conversation that future work depends on]
41
+
29
42
  ## Progress
30
- [Chronological bullet list of what has been done so far]
43
+ [Chronological bullet list of what has been done so far, with specific file paths and line numbers]
44
+
45
+ ## Errors and Fixes
46
+ [Any errors encountered, their root causes, and how they were resolved — this prevents re-investigating the same issues]
31
47
 
32
48
  ## Decisions
33
49
  [Key decisions made, each with its rationale]
34
50
 
35
51
  ## Files Modified
36
- [Each file touched, with a one-line description of what changed]
52
+ [Each file touched, with a one-line description of what changed and why]
37
53
 
38
54
  ## Tool Results Still Relevant
39
- [Any tool output (file reads, grep matches, bash output) that later steps still depend on — include the actual content, not a reference]
55
+ [Any tool output (file reads, grep matches, bash output) that later steps still depend on — include the actual content, not just a reference to it]
40
56
 
41
- ## User Preferences & Corrections
42
- [Anything the user explicitly asked for or corrected — these are load-bearing]
57
+ ## User Messages and Feedback
58
+ [Chronological summary of what the user said, asked for, and corrected — these are load-bearing and must not be lost]
43
59
 
44
60
  ## Next Steps
45
- [What comes next, in priority order]
61
+ [What comes next, in priority order, with enough detail to continue without re-reading the original conversation]
46
62
 
47
63
  If there's an existing [CONTEXT COMPACTION] summary in the messages being compacted, MERGE its content into your output rather than nesting. Do not produce a summary of a summary.`;
48
64
  /**
@@ -56,7 +72,7 @@ export async function autoCompactIfNeeded(history, model, client, debug) {
56
72
  return { history, compacted: false };
57
73
  }
58
74
  if (debug) {
59
- console.error(`[runcode] Auto-compacting: ~${currentTokens} tokens, threshold=${threshold}`);
75
+ console.error(`[franklin] Auto-compacting: ~${currentTokens} tokens, threshold=${threshold}`);
60
76
  }
61
77
  const beforeTokens = estimateHistoryTokens(history);
62
78
  try {
@@ -64,7 +80,7 @@ export async function autoCompactIfNeeded(history, model, client, debug) {
64
80
  const afterTokens = estimateHistoryTokens(compacted);
65
81
  if (afterTokens >= beforeTokens) {
66
82
  if (debug) {
67
- console.error(`[runcode] Auto-compaction grew history (${beforeTokens} → ${afterTokens}) — skipping`);
83
+ console.error(`[franklin] Auto-compaction grew history (${beforeTokens} → ${afterTokens}) — skipping`);
68
84
  }
69
85
  return { history, compacted: false };
70
86
  }
@@ -72,7 +88,7 @@ export async function autoCompactIfNeeded(history, model, client, debug) {
72
88
  }
73
89
  catch (err) {
74
90
  if (debug) {
75
- console.error(`[runcode] Compaction failed: ${err.message}`);
91
+ console.error(`[franklin] Compaction failed: ${err.message}`);
76
92
  }
77
93
  // Fallback: truncate oldest messages instead of crashing
78
94
  const truncated = emergencyTruncate(history, threshold);
@@ -93,7 +109,7 @@ export async function forceCompact(history, model, client, debug) {
93
109
  // Only accept compaction if it actually reduces tokens
94
110
  if (afterTokens >= beforeTokens) {
95
111
  if (debug) {
96
- console.error(`[runcode] Compaction produced larger history (${beforeTokens} → ${afterTokens}) — reverting`);
112
+ console.error(`[franklin] Compaction produced larger history (${beforeTokens} → ${afterTokens}) — reverting`);
97
113
  }
98
114
  return { history, compacted: false };
99
115
  }
@@ -101,7 +117,7 @@ export async function forceCompact(history, model, client, debug) {
101
117
  }
102
118
  catch (err) {
103
119
  if (debug) {
104
- console.error(`[runcode] Force compaction failed: ${err.message}`);
120
+ console.error(`[franklin] Force compaction failed: ${err.message}`);
105
121
  }
106
122
  const threshold = getCompactionThreshold(model);
107
123
  const truncated = emergencyTruncate(history, threshold);
@@ -124,7 +140,7 @@ async function compactHistory(history, model, client, debug) {
124
140
  return history;
125
141
  }
126
142
  if (debug) {
127
- console.error(`[runcode] Summarizing ${toSummarize.length} messages, keeping ${toKeep.length}`);
143
+ console.error(`[franklin] Summarizing ${toSummarize.length} messages, keeping ${toKeep.length}`);
128
144
  }
129
145
  // Build summary request
130
146
  const summaryMessages = [
@@ -140,16 +156,17 @@ async function compactHistory(history, model, client, debug) {
140
156
  max_tokens: COMPACTION_SUMMARY_RESERVE,
141
157
  stream: true,
142
158
  });
143
- // Extract summary text
144
- let summaryText = '';
159
+ // Extract summary text and strip analysis scratchpad
160
+ let rawSummary = '';
145
161
  for (const part of summaryParts) {
146
162
  if (part.type === 'text') {
147
- summaryText += part.text;
163
+ rawSummary += part.text;
148
164
  }
149
165
  }
150
- if (!summaryText) {
166
+ if (!rawSummary) {
151
167
  throw new Error('Empty summary returned from model');
152
168
  }
169
+ const summaryText = formatCompactSummary(rawSummary);
153
170
  // Build compacted history: summary as first message, then kept messages.
154
171
  // The COMPACT_HEADER prefix lets future compactions detect and merge rather
155
172
  // than nest summaries.
@@ -162,14 +179,107 @@ async function compactHistory(history, model, client, debug) {
162
179
  role: 'assistant',
163
180
  content: 'Got it. I have the structured context from earlier work and will continue from where things left off.',
164
181
  },
165
- ...toKeep,
166
182
  ];
183
+ // Post-compact file restoration (inspired by Claude Code)
184
+ // Re-read recently modified files to restore working context that was lost
185
+ // during compaction. This prevents the agent from needing to re-read files
186
+ // it was actively working on.
187
+ const restoredFiles = restoreRecentFiles(summaryText, toSummarize, debug);
188
+ if (restoredFiles) {
189
+ compacted.push({ role: 'user', content: restoredFiles.prompt }, { role: 'assistant', content: 'I have the restored file contents and will use them as context for continuing work.' });
190
+ }
191
+ compacted.push(...toKeep);
167
192
  if (debug) {
168
193
  const newTokens = estimateHistoryTokens(compacted);
169
- console.error(`[runcode] Compacted: ${estimateHistoryTokens(history)} → ${newTokens} tokens`);
194
+ console.error(`[franklin] Compacted: ${estimateHistoryTokens(history)} → ${newTokens} tokens`);
170
195
  }
171
196
  return compacted;
172
197
  }
198
+ /**
199
+ * Restore recently modified files after compaction.
200
+ * Extracts file paths from the compaction summary and the original messages,
201
+ * reads the ones that still exist, and builds a context restoration prompt.
202
+ *
203
+ * Inspired by Claude Code's POST_COMPACT_MAX_FILES_TO_RESTORE mechanism.
204
+ */
205
+ function restoreRecentFiles(summaryText, compactedMessages, debug) {
206
+ // Extract file paths from multiple sources:
207
+ // 1. "Files Modified" section in the summary
208
+ // 2. Edit/Write/Read tool calls in the compacted messages
209
+ const filePaths = new Set();
210
+ // Source 1: Parse "## Files Modified" section from summary
211
+ const filesSection = summaryText.match(/## Files Modified\n([\s\S]*?)(?=\n## |$)/);
212
+ if (filesSection) {
213
+ const pathRegex = /[`"]?([/\w.-]+\.\w{1,10})[`"]?/g;
214
+ let match;
215
+ while ((match = pathRegex.exec(filesSection[1])) !== null) {
216
+ const p = match[1];
217
+ // Filter: must look like a real file path (has directory separator or extension)
218
+ if (p.includes('/') || p.includes('.')) {
219
+ filePaths.add(p);
220
+ }
221
+ }
222
+ }
223
+ // Source 2: Extract from Edit/Write tool_use inputs in compacted messages
224
+ for (const msg of compactedMessages) {
225
+ if (msg.role !== 'assistant' || !Array.isArray(msg.content))
226
+ continue;
227
+ for (const part of msg.content) {
228
+ if (part.type === 'tool_use' && (part.name === 'Edit' || part.name === 'Write')) {
229
+ const fp = part.input?.file_path;
230
+ if (typeof fp === 'string' && fp.startsWith('/')) {
231
+ filePaths.add(fp);
232
+ }
233
+ }
234
+ }
235
+ }
236
+ if (filePaths.size === 0)
237
+ return null;
238
+ // Prioritize: most recently modified files first, limit to POST_COMPACT_MAX_FILES
239
+ const candidates = [...filePaths].filter(p => {
240
+ try {
241
+ return existsSync(p);
242
+ }
243
+ catch {
244
+ return false;
245
+ }
246
+ });
247
+ if (candidates.length === 0)
248
+ return null;
249
+ // Read files within token budget
250
+ const restoredParts = [];
251
+ let tokenBudget = POST_COMPACT_TOKEN_BUDGET;
252
+ const filesToRestore = candidates.slice(0, POST_COMPACT_MAX_FILES);
253
+ for (const fp of filesToRestore) {
254
+ try {
255
+ const content = readFileSync(fp, 'utf-8');
256
+ const estimatedTokens = Math.ceil(content.length / 4 * 1.33);
257
+ if (estimatedTokens > tokenBudget) {
258
+ // File too large for remaining budget — take first chunk
259
+ const maxChars = Math.floor(tokenBudget * 3); // ~3 chars per token
260
+ if (maxChars > 500) {
261
+ const truncated = content.slice(0, maxChars);
262
+ restoredParts.push(`### ${fp}\n\`\`\`\n${truncated}\n... (truncated)\n\`\`\``);
263
+ tokenBudget = 0;
264
+ }
265
+ break;
266
+ }
267
+ restoredParts.push(`### ${fp}\n\`\`\`\n${content}\n\`\`\``);
268
+ tokenBudget -= estimatedTokens;
269
+ }
270
+ catch {
271
+ // File unreadable — skip
272
+ }
273
+ }
274
+ if (restoredParts.length === 0)
275
+ return null;
276
+ if (debug) {
277
+ console.error(`[franklin] Post-compact: restored ${restoredParts.length} files`);
278
+ }
279
+ return {
280
+ prompt: `[POST-COMPACT FILE RESTORATION] The following files were being actively worked on before context compaction. Their current contents are provided to restore working context:\n\n${restoredParts.join('\n\n')}`,
281
+ };
282
+ }
173
283
  /**
174
284
  * Find how many recent messages to keep (don't summarize).
175
285
  * Keeps the most recent tool exchange + the last few user/assistant turns.
@@ -239,6 +349,22 @@ function formatForSummarization(messages) {
239
349
  }
240
350
  return parts.join('\n\n');
241
351
  }
352
+ /**
353
+ * Strip the analysis scratchpad from compaction output and extract the summary.
354
+ * The model drafts in <analysis> tags (for quality), then writes the final
355
+ * summary in <summary> tags. We keep only the summary.
356
+ */
357
+ function formatCompactSummary(raw) {
358
+ // Strip <analysis>...</analysis> (the drafting scratchpad)
359
+ let cleaned = raw.replace(/<analysis>[\s\S]*?<\/analysis>/gi, '').trim();
360
+ // Extract content from <summary>...</summary> if present
361
+ const summaryMatch = cleaned.match(/<summary>([\s\S]*?)<\/summary>/i);
362
+ if (summaryMatch) {
363
+ cleaned = summaryMatch[1].trim();
364
+ }
365
+ // If neither tag was used, the model gave us raw output — use as-is
366
+ return cleaned || raw.trim();
367
+ }
242
368
  /**
243
369
  * Pick a cheaper/faster model for compaction to save cost.
244
370
  */
@@ -1,11 +1,16 @@
1
1
  /**
2
- * Context Manager for runcode
2
+ * Context Manager for Franklin
3
3
  * Assembles system instructions, reads project config, injects environment info.
4
4
  */
5
5
  /**
6
6
  * Build the full system instructions array for a session.
7
7
  * Result is memoized per workingDir for the process lifetime.
8
8
  */
9
- export declare function assembleInstructions(workingDir: string): string[];
9
+ export declare function assembleInstructions(workingDir: string, model?: string): string[];
10
+ /**
11
+ * Model-family-specific execution guidance.
12
+ * Weak models get strict guardrails. Strong models get quality standards.
13
+ */
14
+ export declare function getModelGuidance(model: string): string;
10
15
  /** Invalidate cache for a workingDir (call after /clear or session reset). */
11
- export declare function invalidateInstructionCache(workingDir: string): void;
16
+ export declare function invalidateInstructionCache(workingDir?: string): void;