@blockrun/franklin 3.6.4 → 3.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -202,7 +202,7 @@ const DIRECT_COMMANDS = {
202
202
  ` **Git:** /push /pr /undo /status /diff /log /branch /stash /unstash\n` +
203
203
  ` **Analysis:** /security /lint /optimize /todo /deps /clean /migrate /doc\n` +
204
204
  ` **Session:** /plan /ultraplan /execute /compact /retry /sessions /resume /session-search /context /tasks\n` +
205
- ` **Power:** /ultrathink [query] /ultraplan /noplan /dump\n` +
205
+ ` **Power:** /ultrathink [query] /ultraplan /noplan /moa [query] /dump\n` +
206
206
  ` **Info:** /model /wallet /cost /tokens /learnings /brain /mcp /doctor /version /bug /help\n` +
207
207
  ` **UI:** /clear /exit\n` +
208
208
  (ultrathinkOn ? `\n Ultrathink: ON\n` : '')
@@ -536,6 +536,8 @@ const ARG_COMMANDS = [
536
536
  { prefix: '/refactor ', rewrite: (a) => `Refactor: ${a}. Read the relevant code first, then make targeted changes. Explain each change.` },
537
537
  { prefix: '/scaffold ', rewrite: (a) => `Create the scaffolding/boilerplate for: ${a}. Generate the file structure and initial code. Ask me if you need clarification on requirements.` },
538
538
  { prefix: '/doc ', rewrite: (a) => `Generate documentation for ${a}. Include: purpose, API/interface description, usage examples, and important notes.` },
539
+ { prefix: '/moa ', rewrite: (a) => `Use the MixtureOfAgents tool to get a high-quality answer by querying multiple AI models in parallel: ${a}` },
540
+ { prefix: '/moa', rewrite: () => `Use the MixtureOfAgents tool. Ask me what question I want answered by multiple models.` },
539
541
  ];
540
542
  // ─── Main dispatch ────────────────────────────────────────────────────────
541
543
  /**
@@ -667,6 +669,7 @@ export async function handleSlashCommand(input, ctx) {
667
669
  else {
668
670
  const newModel = resolveModel(input.slice(7).trim());
669
671
  ctx.config.model = newModel;
672
+ ctx.config.baseModel = newModel; // Update recovery target so loop doesn't reset
670
673
  ctx.config.onModelChange?.(newModel);
671
674
  ctx.onEvent({ kind: 'text_delta', text: `Model → **${newModel}**\n` });
672
675
  }
@@ -5,7 +5,7 @@
5
5
  import fs from 'node:fs';
6
6
  import path from 'node:path';
7
7
  import { execSync } from 'node:child_process';
8
- import { loadLearnings, decayLearnings, saveLearnings, formatForPrompt } from '../learnings/store.js';
8
+ import { loadLearnings, decayLearnings, saveLearnings, formatForPrompt, loadSkills, formatSkillsForPrompt } from '../learnings/store.js';
9
9
  // ─── System Instructions Assembly ──────────────────────────────────────────
10
10
  // Composable prompt sections — each independently maintainable and conditionally includable.
11
11
  function getCoreInstructions() {
@@ -186,10 +186,16 @@ export function assembleInstructions(workingDir, model) {
186
186
  getTokenEfficiencySection(),
187
187
  getVerificationSection(),
188
188
  ];
189
- // Read RUNCODE.md or CLAUDE.md from the project
189
+ // Read RUNCODE.md or CLAUDE.md from the project (with injection scanning)
190
190
  const projectConfig = readProjectConfig(workingDir);
191
191
  if (projectConfig) {
192
- parts.push(`# Project Instructions\n\n${projectConfig}`);
192
+ const { sanitized, threats } = scanForInjection(projectConfig);
193
+ if (threats.length > 0) {
194
+ parts.push(`# Project Instructions\n\n⚠️ WARNING: ${threats.length} suspicious pattern(s) detected in project config and neutralized.\n\n${sanitized}`);
195
+ }
196
+ else {
197
+ parts.push(`# Project Instructions\n\n${projectConfig}`);
198
+ }
193
199
  }
194
200
  // Inject environment info
195
201
  parts.push(buildEnvironmentSection(workingDir));
@@ -210,6 +216,18 @@ export function assembleInstructions(workingDir, model) {
210
216
  }
211
217
  }
212
218
  catch { /* learnings are optional — never block startup */ }
219
+ // Inject relevant skills (procedural memory from past complex tasks)
220
+ try {
221
+ const allSkills = loadSkills();
222
+ if (allSkills.length > 0) {
223
+ // Skills are matched lazily on first user message — for now inject top skills by use count
224
+ const topSkills = allSkills.sort((a, b) => b.uses - a.uses).slice(0, 5);
225
+ const skillsSection = formatSkillsForPrompt(topSkills);
226
+ if (skillsSection)
227
+ parts.push(skillsSection);
228
+ }
229
+ }
230
+ catch { /* skills are optional */ }
213
231
  // Model-specific execution guidance
214
232
  if (model) {
215
233
  parts.push(getModelGuidance(model));
@@ -276,6 +294,52 @@ export function invalidateInstructionCache(workingDir) {
276
294
  _instructionCache.clear();
277
295
  }
278
296
  }
297
+ // ─── Prompt Injection Detection ────────────────────────────────────────────
298
+ /** Patterns that indicate potential prompt injection in context files. */
299
+ const INJECTION_PATTERNS = [
300
+ // Direct instruction override attempts
301
+ { pattern: /ignore\s+(all\s+)?previous\s+instructions/i, description: 'instruction override' },
302
+ { pattern: /disregard\s+(all\s+)?(previous\s+|above\s+)?rules/i, description: 'rule disregard' },
303
+ { pattern: /forget\s+(everything|all|your)\s+(you|instructions|rules)/i, description: 'memory wipe' },
304
+ { pattern: /you\s+are\s+now\s+(?:a\s+)?(?:different|new|unrestricted)/i, description: 'identity hijack' },
305
+ { pattern: /system\s*:\s*you\s+are/i, description: 'fake system message' },
306
+ // Dangerous command injection
307
+ { pattern: /execute\s+(curl|wget|bash|sh|python|node)\b/i, description: 'command execution' },
308
+ { pattern: /\bcat\s+\/etc\/(passwd|shadow|sudoers)/i, description: 'credential access' },
309
+ { pattern: /\brm\s+-rf\s+[\/~]/i, description: 'destructive command' },
310
+ { pattern: /\beval\s*\(/i, description: 'eval injection' },
311
+ // Data exfiltration
312
+ { pattern: /\bcurl\s+.*\|\s*(bash|sh)/i, description: 'pipe to shell' },
313
+ { pattern: /send\s+(to|via)\s+(http|webhook|url)/i, description: 'data exfiltration' },
314
+ // HTML/comment injection
315
+ { pattern: /<!--[\s\S]*?-->/g, description: 'HTML comment injection' },
316
+ ];
317
+ /** Invisible unicode characters that can hide malicious content. */
318
+ const INVISIBLE_UNICODE = /[\u200B-\u200F\u202A-\u202E\u2060-\u2064\uFEFF\u00AD]/g;
319
+ /**
320
+ * Scan text for prompt injection patterns and invisible unicode.
321
+ * Returns sanitized text with threats neutralized and a list of detections.
322
+ */
323
+ function scanForInjection(text) {
324
+ const threats = [];
325
+ let sanitized = text;
326
+ // Check for invisible unicode
327
+ if (INVISIBLE_UNICODE.test(sanitized)) {
328
+ const count = (sanitized.match(INVISIBLE_UNICODE) || []).length;
329
+ threats.push(`${count} invisible unicode character(s) removed`);
330
+ sanitized = sanitized.replace(INVISIBLE_UNICODE, '');
331
+ }
332
+ // Check for injection patterns
333
+ for (const { pattern, description } of INJECTION_PATTERNS) {
334
+ const matches = sanitized.match(pattern);
335
+ if (matches) {
336
+ threats.push(`${description}: "${matches[0].slice(0, 50)}"`);
337
+ // Neutralize by wrapping in brackets (visible but defanged)
338
+ sanitized = sanitized.replace(pattern, (match) => `[BLOCKED: ${match}]`);
339
+ }
340
+ }
341
+ return { sanitized, threats };
342
+ }
279
343
  // ─── Project Config ────────────────────────────────────────────────────────
280
344
  /**
281
345
  * Look for RUNCODE.md, then CLAUDE.md in the working directory and parents.
@@ -19,6 +19,7 @@ import { maybeMidSessionExtract } from '../learnings/extractor.js';
19
19
  import { routeRequest, parseRoutingProfile } from '../router/index.js';
20
20
  import { recordOutcome } from '../router/local-elo.js';
21
21
  import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
22
+ import { shouldVerify, runVerification } from './verification.js';
22
23
  import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
23
24
  /**
24
25
  * Atomically replace all elements in a history array.
@@ -218,7 +219,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
218
219
  const permissions = new PermissionManager(config.permissionMode ?? 'default', config.permissionPromptFn);
219
220
  const history = [];
220
221
  let lastUserInput = ''; // For /retry
221
- const originalModel = config.model; // Preserve original model/routing profile for recovery
222
+ config.baseModel = config.model; // User's intended model/model command updates this
222
223
  let turnFailedModels = new Set(); // Models that failed this turn (cleared each new turn)
223
224
  // Track models that failed with 402 (payment required) across turns.
224
225
  // These persist until the session ends — unlike transient errors, payment failures
@@ -294,9 +295,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
294
295
  // ── Model recovery: try original model at the start of each new turn ──
295
296
  // If we fell back to a free model last turn due to a transient error, try original again.
296
297
  // But DON'T reset if the original model had a payment failure — it will just fail again.
297
- if (config.model !== originalModel && !paymentFailedModels.has(originalModel)) {
298
- config.model = originalModel;
299
- config.onModelChange?.(originalModel);
298
+ const baseModel = config.baseModel ?? config.model;
299
+ if (config.model !== baseModel && !paymentFailedModels.has(baseModel)) {
300
+ config.model = baseModel;
301
+ config.onModelChange?.(baseModel);
300
302
  }
301
303
  turnFailedModels = new Set(); // Fresh slate for transient failures this turn
302
304
  const abort = new AbortController();
@@ -714,6 +716,35 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
714
716
  });
715
717
  }
716
718
  }
719
+ // ── Verification gate: run adversarial checks on substantial work ──
720
+ if (shouldVerify(turnToolCalls, turnToolCounts, lastUserInput || '')) {
721
+ try {
722
+ const vResult = await runVerification(history, capabilityMap, client, {
723
+ model: config.model,
724
+ workDir,
725
+ abortSignal: abort.signal,
726
+ onEvent: (e) => { if (e.kind === 'text_delta' && e.text)
727
+ onEvent({ kind: 'text_delta', text: e.text }); },
728
+ });
729
+ if (vResult.verdict === 'FAIL' && vResult.issues.length > 0) {
730
+ // Inject verification feedback — agent will see this and continue fixing
731
+ const feedbackMsg = {
732
+ role: 'user',
733
+ content: `[VERIFICATION FAILED]\n${vResult.summary}\n\nFix the issues above and verify your fixes work.`,
734
+ };
735
+ history.push(feedbackMsg);
736
+ persistSessionMessage(feedbackMsg);
737
+ onEvent({ kind: 'text_delta', text: `\n⚠️ *Verification found issues — fixing...*\n` });
738
+ continue; // Re-enter the loop to fix issues
739
+ }
740
+ if (vResult.verdict === 'PASS') {
741
+ onEvent({ kind: 'text_delta', text: '\n✓ *Verified*\n' });
742
+ }
743
+ }
744
+ catch {
745
+ // Verification errors never block the main flow
746
+ }
747
+ }
717
748
  // Record success for local Elo learning (include tool call count for efficiency)
718
749
  if (lastRoutedCategory && lastRoutedModel) {
719
750
  recordOutcome(lastRoutedCategory, lastRoutedModel, 'continued', turnToolCalls);
@@ -142,4 +142,6 @@ export interface AgentConfig {
142
142
  onAskUser?: (question: string, options?: string[]) => Promise<string>;
143
143
  /** Notify UI when agent switches model (e.g. payment fallback) */
144
144
  onModelChange?: (model: string) => void;
145
+ /** The user's intended model — updated by /model command, used for turn recovery */
146
+ baseModel?: string;
145
147
  }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Verification Agent — adversarial testing gate.
3
+ *
4
+ * After the main agent completes substantial work (writes/edits files, runs commands),
5
+ * this agent runs independently to try to BREAK what was built. It can only read and
6
+ * execute — never modify files. Returns PASS/FAIL/PARTIAL verdict.
7
+ *
8
+ * If FAIL: injects feedback into conversation so the main agent can fix issues.
9
+ * If PASS: work is considered verified.
10
+ *
11
+ * Inspired by Claude Code's verification agent architecture.
12
+ */
13
+ import type { CapabilityHandler, Dialogue } from './types.js';
14
+ import { ModelClient } from './llm.js';
15
+ export interface VerificationResult {
16
+ verdict: 'PASS' | 'FAIL' | 'PARTIAL' | 'SKIPPED';
17
+ summary: string;
18
+ issues: string[];
19
+ }
20
+ /**
21
+ * Should we run verification for this turn?
22
+ * Only for substantial work: 3+ tool calls AND at least one write/edit/bash.
23
+ */
24
+ export declare function shouldVerify(turnToolCalls: number, turnToolCounts: Map<string, number>, userInput: string): boolean;
25
+ /**
26
+ * Filter capability handlers to only allow read-only tools.
27
+ * Bash is allowed (for running tests/builds) but Edit/Write are blocked.
28
+ */
29
+ export declare function getVerificationTools(handlers: Map<string, CapabilityHandler>): Map<string, CapabilityHandler>;
30
+ /**
31
+ * Run the verification agent on the current conversation state.
32
+ * Uses a cheap model to minimize cost. Returns verdict + issues.
33
+ */
34
+ export declare function runVerification(history: Dialogue[], handlers: Map<string, CapabilityHandler>, client: ModelClient, config: {
35
+ model: string;
36
+ workDir: string;
37
+ abortSignal: AbortSignal;
38
+ onEvent?: (event: {
39
+ kind: string;
40
+ text?: string;
41
+ }) => void;
42
+ }): Promise<VerificationResult>;
@@ -0,0 +1,206 @@
1
+ /**
2
+ * Verification Agent — adversarial testing gate.
3
+ *
4
+ * After the main agent completes substantial work (writes/edits files, runs commands),
5
+ * this agent runs independently to try to BREAK what was built. It can only read and
6
+ * execute — never modify files. Returns PASS/FAIL/PARTIAL verdict.
7
+ *
8
+ * If FAIL: injects feedback into conversation so the main agent can fix issues.
9
+ * If PASS: work is considered verified.
10
+ *
11
+ * Inspired by Claude Code's verification agent architecture.
12
+ */
13
+ // ─── Verification System Prompt ───────────────────────────────────────────
14
+ const VERIFICATION_PROMPT = `You are a VERIFICATION agent. Your job is NOT to confirm that code works — it is to TRY TO BREAK IT.
15
+
16
+ ## Rules
17
+
18
+ 1. **Adversarial mindset**: Assume the code has bugs. Your goal is to find them.
19
+ 2. **No modifications**: You may ONLY use Read, Bash, Glob, and Grep tools. You MUST NOT use Edit, Write, or any tool that modifies files.
20
+ 3. **Evidence required**: Every check MUST include:
21
+ - What you tested (the exact command or operation)
22
+ - The actual output
23
+ - Whether it PASSED or FAILED
24
+ 4. **No rationalization**: These phrases are NEVER acceptable as evidence:
25
+ - "The code looks correct"
26
+ - "This should work"
27
+ - "Based on the implementation, it handles..."
28
+ - "The tests pass" (unless you actually ran them and showed output)
29
+
30
+ ## What to Check
31
+
32
+ 1. **Does it compile/build?** Run the build command.
33
+ 2. **Do tests pass?** Run the test suite.
34
+ 3. **Edge cases**: Empty inputs, very large inputs, missing files, invalid data.
35
+ 4. **Error handling**: What happens when things go wrong?
36
+ 5. **Consistency**: Does the change break other parts of the codebase?
37
+
38
+ ## Output Format
39
+
40
+ After running your checks, output a verdict in EXACTLY this format:
41
+
42
+ VERDICT: PASS|FAIL|PARTIAL
43
+
44
+ Then explain:
45
+ - What you tested
46
+ - What passed
47
+ - What failed (if any)
48
+ - Specific issues to fix (if FAIL)
49
+
50
+ Keep it concise — focus on actionable findings, not narration.`;
51
+ // ─── Thresholds ──────────────────────────────────────────────────────────
52
+ /** Only verify turns where substantial work was done. */
53
+ const WRITE_TOOLS = new Set(['Edit', 'Write', 'Bash']);
54
+ /** Minimum tool calls to trigger verification. */
55
+ const MIN_TOOL_CALLS = 3;
56
+ /** Maximum tokens to spend on verification (prevent runaway). */
57
+ const MAX_VERIFICATION_TOKENS = 8192;
58
+ // ─── Decision Logic ──────────────────────────────────────────────────────
59
+ /**
60
+ * Should we run verification for this turn?
61
+ * Only for substantial work: 3+ tool calls AND at least one write/edit/bash.
62
+ */
63
+ export function shouldVerify(turnToolCalls, turnToolCounts, userInput) {
64
+ // Skip if not enough tool calls
65
+ if (turnToolCalls < MIN_TOOL_CALLS)
66
+ return false;
67
+ // Skip if no write-like tools were used
68
+ let hasWriteTool = false;
69
+ for (const [name] of turnToolCounts) {
70
+ if (WRITE_TOOLS.has(name)) {
71
+ hasWriteTool = true;
72
+ break;
73
+ }
74
+ }
75
+ if (!hasWriteTool)
76
+ return false;
77
+ // Skip if user explicitly asked for something quick
78
+ const lower = userInput.toLowerCase();
79
+ if (lower.startsWith('/') || lower.length < 20)
80
+ return false;
81
+ return true;
82
+ }
83
+ // ─── Read-only tool filter ───────────────────────────────────────────────
84
+ const READ_ONLY_TOOLS = new Set(['Read', 'Glob', 'Grep', 'Bash', 'WebSearch', 'WebFetch']);
85
+ /**
86
+ * Filter capability handlers to only allow read-only tools.
87
+ * Bash is allowed (for running tests/builds) but Edit/Write are blocked.
88
+ */
89
+ export function getVerificationTools(handlers) {
90
+ const filtered = new Map();
91
+ for (const [name, handler] of handlers) {
92
+ if (READ_ONLY_TOOLS.has(name)) {
93
+ filtered.set(name, handler);
94
+ }
95
+ }
96
+ return filtered;
97
+ }
98
+ // ─── Run Verification ────────────────────────────────────────────────────
99
+ /**
100
+ * Run the verification agent on the current conversation state.
101
+ * Uses a cheap model to minimize cost. Returns verdict + issues.
102
+ */
103
+ export async function runVerification(history, handlers, client, config) {
104
+ const verificationTools = getVerificationTools(handlers);
105
+ // Build verification prompt from recent history context
106
+ const recentWork = extractRecentWork(history);
107
+ if (!recentWork) {
108
+ return { verdict: 'SKIPPED', summary: 'No recent work to verify.', issues: [] };
109
+ }
110
+ const verificationHistory = [
111
+ {
112
+ role: 'user',
113
+ content: `The following work was just completed. Your job is to VERIFY it by running adversarial checks.\n\n${recentWork}\n\nRun build, tests, and edge case checks. Output your VERDICT.`,
114
+ },
115
+ ];
116
+ config.onEvent?.({ kind: 'text_delta', text: '\n*Verifying...*\n' });
117
+ // Use cheap model for verification
118
+ const verificationModel = 'nvidia/nemotron-ultra-253b'; // Free model to keep cost zero
119
+ try {
120
+ // Simple single-turn verification call
121
+ const response = await client.complete({
122
+ model: verificationModel,
123
+ system: VERIFICATION_PROMPT,
124
+ messages: verificationHistory,
125
+ tools: Array.from(verificationTools.values()).map(h => h.spec),
126
+ max_tokens: MAX_VERIFICATION_TOKENS,
127
+ });
128
+ // Extract text from response
129
+ let responseText = '';
130
+ if (response.content) {
131
+ for (const part of response.content) {
132
+ if (typeof part === 'string') {
133
+ responseText += part;
134
+ }
135
+ else if (part.type === 'text') {
136
+ responseText += part.text;
137
+ }
138
+ }
139
+ }
140
+ // Parse verdict
141
+ const verdictMatch = responseText.match(/VERDICT:\s*(PASS|FAIL|PARTIAL)/i);
142
+ const verdict = verdictMatch
143
+ ? verdictMatch[1].toUpperCase()
144
+ : 'PARTIAL';
145
+ // Extract issues
146
+ const issues = [];
147
+ const issueLines = responseText.split('\n').filter(l => l.match(/^[-•*]\s*(FAIL|ERROR|BUG|ISSUE|PROBLEM)/i) ||
148
+ l.match(/^[-•*]\s+.*fail/i));
149
+ for (const line of issueLines) {
150
+ issues.push(line.replace(/^[-•*]\s*/, '').trim());
151
+ }
152
+ return { verdict, summary: responseText.slice(0, 500), issues };
153
+ }
154
+ catch (err) {
155
+ // Verification failure should never block the main flow
156
+ return {
157
+ verdict: 'SKIPPED',
158
+ summary: `Verification error: ${err.message}`,
159
+ issues: [],
160
+ };
161
+ }
162
+ }
163
+ /**
164
+ * Extract a summary of recent work from the conversation history.
165
+ * Looks at the last assistant turn and its tool calls.
166
+ */
167
+ function extractRecentWork(history) {
168
+ const parts = [];
169
+ // Walk backwards through history to find recent tool uses and assistant messages
170
+ let found = 0;
171
+ for (let i = history.length - 1; i >= 0 && found < 10; i--) {
172
+ const msg = history[i];
173
+ const role = msg.role;
174
+ // Stop at a pure user message boundary (not a tool_result user message)
175
+ if (role === 'user' && !Array.isArray(msg.content))
176
+ break;
177
+ if (role === 'assistant' && Array.isArray(msg.content)) {
178
+ for (const part of msg.content) {
179
+ if (typeof part === 'object') {
180
+ if (part.type === 'text' && part.text) {
181
+ parts.unshift(`Assistant: ${part.text.slice(0, 500)}`);
182
+ found++;
183
+ }
184
+ else if (part.type === 'tool_use') {
185
+ parts.unshift(`Tool: ${part.name}(${JSON.stringify(part.input).slice(0, 200)})`);
186
+ found++;
187
+ }
188
+ }
189
+ }
190
+ }
191
+ else if (role === 'user' && Array.isArray(msg.content)) {
192
+ for (const part of msg.content) {
193
+ if (typeof part === 'object' && part.type === 'tool_result') {
194
+ const output = typeof part.content === 'string'
195
+ ? part.content
196
+ : Array.isArray(part.content)
197
+ ? part.content.map(c => c.text || '').join('\n')
198
+ : '';
199
+ parts.unshift(`Result: ${output.slice(0, 300)}`);
200
+ found++;
201
+ }
202
+ }
203
+ }
204
+ }
205
+ return parts.length > 0 ? parts.join('\n\n') : null;
206
+ }
@@ -130,8 +130,11 @@ export async function startCommand(options) {
130
130
  }
131
131
  }
132
132
  }
133
- // Build capabilities (built-in + MCP + sub-agent)
133
+ // Build capabilities (built-in + MCP + sub-agent + MoA)
134
134
  const subAgent = createSubAgentCapability(apiUrl, chain, allCapabilities);
135
+ // Register MoA tool config (needs API URL for parallel model queries)
136
+ const { registerMoAConfig } = await import('../tools/moa.js');
137
+ registerMoAConfig(apiUrl, chain);
135
138
  const capabilities = [...allCapabilities, ...mcpTools, subAgent];
136
139
  // Validate tool descriptions (self-evolution: detect SearchX-style description bugs)
137
140
  if (options.debug) {
@@ -236,6 +239,18 @@ async function runWithInkUI(agentConfig, model, workDir, version, walletInfo, on
236
239
  catch { /* extraction is best-effort */ }
237
240
  }
238
241
  await disconnectMcpServers();
242
+ // Session summary — show cost and usage before goodbye
243
+ try {
244
+ const { getStatsSummary } = await import('../stats/tracker.js');
245
+ const { stats, saved } = getStatsSummary();
246
+ if (stats.totalRequests > 0) {
247
+ const cost = stats.totalCostUsd.toFixed(4);
248
+ const savedStr = saved > 0.001 ? ` · saved $${saved.toFixed(2)} vs Opus` : '';
249
+ const tokens = `${(stats.totalInputTokens / 1000).toFixed(0)}k in / ${(stats.totalOutputTokens / 1000).toFixed(0)}k out`;
250
+ console.log(chalk.dim(`\n Session: ${stats.totalRequests} requests · $${cost} USDC${savedStr} · ${tokens}`));
251
+ }
252
+ }
253
+ catch { /* stats unavailable */ }
239
254
  console.log(chalk.dim('\nGoodbye.\n'));
240
255
  }
241
256
  // ─── Basic readline UI (piped input) ───────────────────────────────────────
@@ -288,6 +303,18 @@ async function runWithBasicUI(agentConfig, model, workDir) {
288
303
  console.error(chalk.red(`\nError: ${err.message}`));
289
304
  }
290
305
  }
306
+ // Session summary for piped mode
307
+ try {
308
+ const { getStatsSummary } = await import('../stats/tracker.js');
309
+ const { stats, saved } = getStatsSummary();
310
+ if (stats.totalRequests > 0) {
311
+ const cost = stats.totalCostUsd.toFixed(4);
312
+ const savedStr = saved > 0.001 ? ` · saved $${saved.toFixed(2)} vs Opus` : '';
313
+ const tokens = `${(stats.totalInputTokens / 1000).toFixed(0)}k in / ${(stats.totalOutputTokens / 1000).toFixed(0)}k out`;
314
+ console.error(`Session: ${stats.totalRequests} requests · $${cost} USDC${savedStr} · ${tokens}`);
315
+ }
316
+ }
317
+ catch { /* stats unavailable */ }
291
318
  ui.printGoodbye();
292
319
  flushStats();
293
320
  }
@@ -14,6 +14,11 @@ export declare function bootstrapFromClaudeConfig(client: ModelClient): Promise<
14
14
  * Runs asynchronously — caller should fire-and-forget.
15
15
  */
16
16
  export declare function extractLearnings(history: Dialogue[], sessionId: string, client: ModelClient): Promise<void>;
17
+ /**
18
+ * Try to extract a reusable skill from the recent work.
19
+ * Called from maybeMidSessionExtract when enough tool calls happened.
20
+ */
21
+ export declare function maybeExtractSkill(history: Dialogue[], turnToolCalls: number, sessionId: string, client: ModelClient): Promise<void>;
17
22
  /**
18
23
  * Check if mid-session extraction should run, and if so, run it in background.
19
24
  * Called from the agent loop after tool execution completes.
@@ -5,7 +5,7 @@
5
5
  import fs from 'node:fs';
6
6
  import path from 'node:path';
7
7
  import os from 'node:os';
8
- import { loadLearnings, mergeLearning, saveLearnings } from './store.js';
8
+ import { loadLearnings, mergeLearning, saveLearnings, loadSkills, saveSkill } from './store.js';
9
9
  // Free models for learning extraction — JSON extraction is simple enough.
10
10
  // Ordered by reliability: try the best free model first, fall back to others.
11
11
  const EXTRACTION_MODELS = [
@@ -242,6 +242,120 @@ async function runExtraction(condensed, sessionId, client) {
242
242
  }
243
243
  saveLearnings(existing);
244
244
  }
245
+ // ─── Skill extraction (procedural memory) ─────────────────────────────────
246
+ // After complex tasks, detect reusable procedures and save as skills.
247
+ const SKILL_EXTRACTION_PROMPT = `You are analyzing a conversation where an AI agent completed a complex multi-step task. Decide if this task pattern should be saved as a reusable skill (procedure).
248
+
249
+ Save a skill when:
250
+ 1. The task involved 5+ distinct steps that could be repeated
251
+ 2. The steps are generalizable (not one-off fixes for specific bugs)
252
+ 3. Future similar tasks would benefit from having the procedure documented
253
+
254
+ If the task IS worth saving, output in this exact format (no markdown fences):
255
+ {"skill":{"name":"kebab-case-name","description":"One-line description","triggers":["keyword1","keyword2"],"steps":"## Steps\\n1. First step\\n2. Second step\\n..."}}
256
+
257
+ If NOT worth saving, output exactly:
258
+ {"skill":null}
259
+
260
+ Be selective — only save genuinely reusable multi-step procedures.`;
261
+ const MIN_TOOL_CALLS_FOR_SKILL = 5;
262
+ /**
263
+ * Try to extract a reusable skill from the recent work.
264
+ * Called from maybeMidSessionExtract when enough tool calls happened.
265
+ */
266
+ export async function maybeExtractSkill(history, turnToolCalls, sessionId, client) {
267
+ if (turnToolCalls < MIN_TOOL_CALLS_FOR_SKILL)
268
+ return;
269
+ // Condense recent history with tool details (skills need tool context)
270
+ const parts = [];
271
+ let chars = 0;
272
+ const CAP = 6000;
273
+ for (const msg of history.slice(-20)) {
274
+ if (chars >= CAP)
275
+ break;
276
+ if (typeof msg.content === 'string') {
277
+ const line = `${msg.role}: ${msg.content.slice(0, 300)}`;
278
+ parts.push(line);
279
+ chars += line.length;
280
+ }
281
+ else if (Array.isArray(msg.content)) {
282
+ for (const p of msg.content) {
283
+ if (chars >= CAP)
284
+ break;
285
+ if (p.type === 'text') {
286
+ const line = `${msg.role}: ${p.text.slice(0, 200)}`;
287
+ parts.push(line);
288
+ chars += line.length;
289
+ }
290
+ else if (p.type === 'tool_use') {
291
+ const line = `tool: ${p.name}(${JSON.stringify(p.input).slice(0, 150)})`;
292
+ parts.push(line);
293
+ chars += line.length;
294
+ }
295
+ else if (p.type === 'tool_result') {
296
+ const text = typeof p.content === 'string' ? p.content : '';
297
+ const line = `result: ${text.slice(0, 100)}`;
298
+ parts.push(line);
299
+ chars += line.length;
300
+ }
301
+ }
302
+ }
303
+ }
304
+ const condensed = parts.join('\n\n');
305
+ if (condensed.length < 200)
306
+ return;
307
+ try {
308
+ let text = '';
309
+ for (const model of EXTRACTION_MODELS) {
310
+ try {
311
+ const response = await client.complete({
312
+ model,
313
+ messages: [{ role: 'user', content: condensed }],
314
+ system: SKILL_EXTRACTION_PROMPT,
315
+ max_tokens: 1500,
316
+ temperature: 0.2,
317
+ });
318
+ text = response.content
319
+ .filter((p) => p.type === 'text')
320
+ .map((p) => p.text)
321
+ .join('');
322
+ break;
323
+ }
324
+ catch {
325
+ continue;
326
+ }
327
+ }
328
+ if (!text)
329
+ return;
330
+ // Parse JSON
331
+ const start = text.indexOf('{');
332
+ const end = text.lastIndexOf('}');
333
+ if (start === -1 || end === -1)
334
+ return;
335
+ const parsed = JSON.parse(text.slice(start, end + 1));
336
+ if (!parsed.skill)
337
+ return;
338
+ const { name, description, triggers, steps } = parsed.skill;
339
+ if (!name || !description || !steps)
340
+ return;
341
+ // Check for duplicate skills
342
+ const existing = loadSkills();
343
+ if (existing.some(s => s.name === name))
344
+ return;
345
+ saveSkill({
346
+ name,
347
+ description,
348
+ triggers: Array.isArray(triggers) ? triggers : [],
349
+ steps,
350
+ created: new Date().toISOString().split('T')[0],
351
+ uses: 0,
352
+ source_session: sessionId,
353
+ });
354
+ }
355
+ catch {
356
+ // Skill extraction is best-effort
357
+ }
358
+ }
245
359
  const midSessionState = {
246
360
  lastExtractionTokens: 0,
247
361
  lastExtractionToolCalls: 0,
@@ -289,7 +403,9 @@ export function maybeMidSessionExtract(history, estimatedTokens, totalToolCalls,
289
403
  const condensed = condenseHistory(history);
290
404
  if (condensed.length < 100)
291
405
  return;
292
- // Run in background — errors are silently swallowed
406
+ // Run learnings + skill extraction in background — errors are silently swallowed
293
407
  runExtraction(condensed, `${sessionId}:mid-${midSessionState.extractionCount}`, client)
294
408
  .catch(() => { });
409
+ maybeExtractSkill(history, totalToolCalls, sessionId, client)
410
+ .catch(() => { });
295
411
  }
@@ -1,3 +1,3 @@
1
- export type { Learning, LearningCategory, ExtractionResult } from './types.js';
2
- export { loadLearnings, saveLearnings, mergeLearning, decayLearnings, formatForPrompt } from './store.js';
3
- export { extractLearnings, bootstrapFromClaudeConfig, maybeMidSessionExtract } from './extractor.js';
1
+ export type { Learning, LearningCategory, ExtractionResult, Skill } from './types.js';
2
+ export { loadLearnings, saveLearnings, mergeLearning, decayLearnings, formatForPrompt, loadSkills, saveSkill, matchSkills, formatSkillsForPrompt } from './store.js';
3
+ export { extractLearnings, bootstrapFromClaudeConfig, maybeMidSessionExtract, maybeExtractSkill } from './extractor.js';
@@ -1,2 +1,2 @@
1
- export { loadLearnings, saveLearnings, mergeLearning, decayLearnings, formatForPrompt } from './store.js';
2
- export { extractLearnings, bootstrapFromClaudeConfig, maybeMidSessionExtract } from './extractor.js';
1
+ export { loadLearnings, saveLearnings, mergeLearning, decayLearnings, formatForPrompt, loadSkills, saveSkill, matchSkills, formatSkillsForPrompt } from './store.js';
2
+ export { extractLearnings, bootstrapFromClaudeConfig, maybeMidSessionExtract, maybeExtractSkill } from './extractor.js';
@@ -2,7 +2,7 @@
2
2
  * Persistence layer for per-user learnings.
3
3
  * Stored as JSONL at ~/.blockrun/learnings.jsonl.
4
4
  */
5
- import type { Learning, LearningCategory } from './types.js';
5
+ import type { Learning, LearningCategory, Skill } from './types.js';
6
6
  export declare function loadLearnings(): Learning[];
7
7
  export declare function saveLearnings(learnings: Learning[]): void;
8
8
  export declare function mergeLearning(existing: Learning[], newEntry: {
@@ -13,3 +13,13 @@ export declare function mergeLearning(existing: Learning[], newEntry: {
13
13
  }): Learning[];
14
14
  export declare function decayLearnings(learnings: Learning[]): Learning[];
15
15
  export declare function formatForPrompt(learnings: Learning[]): string;
16
+ /** Load all skills from disk. */
17
+ export declare function loadSkills(): Skill[];
18
+ /** Save a new skill to disk. */
19
+ export declare function saveSkill(skill: Skill): void;
20
+ /** Bump use count for a skill. */
21
+ export declare function bumpSkillUse(skill: Skill): void;
22
+ /** Find skills relevant to a user message, by trigger matching. */
23
+ export declare function matchSkills(input: string, skills: Skill[]): Skill[];
24
+ /** Format matched skills for system prompt injection. */
25
+ export declare function formatSkillsForPrompt(skills: Skill[]): string;
@@ -157,3 +157,103 @@ export function formatForPrompt(learnings) {
157
157
  return '';
158
158
  return '# Personal Context\nLearned from previous sessions:\n\n' + sections.join('\n\n');
159
159
  }
160
+ // ─── Skills (procedural memory) ──────────────────────────────────────────
161
+ // Stored as individual markdown files in ~/.blockrun/skills/
162
+ // Larger than learnings, conditionally injected based on trigger matching.
163
+ const SKILLS_DIR = path.join(BLOCKRUN_DIR, 'skills');
164
+ const MAX_SKILLS_IN_PROMPT = 5;
165
+ const MAX_SKILL_CHARS = 1500;
166
+ function ensureSkillsDir() {
167
+ if (!fs.existsSync(SKILLS_DIR)) {
168
+ fs.mkdirSync(SKILLS_DIR, { recursive: true });
169
+ }
170
+ }
171
+ /** Load all skills from disk. */
172
+ export function loadSkills() {
173
+ ensureSkillsDir();
174
+ const skills = [];
175
+ try {
176
+ for (const file of fs.readdirSync(SKILLS_DIR).filter(f => f.endsWith('.md'))) {
177
+ try {
178
+ const raw = fs.readFileSync(path.join(SKILLS_DIR, file), 'utf-8');
179
+ const skill = parseSkillFile(raw);
180
+ if (skill)
181
+ skills.push(skill);
182
+ }
183
+ catch { /* skip corrupt */ }
184
+ }
185
+ }
186
+ catch { /* dir doesn't exist yet */ }
187
+ return skills;
188
+ }
189
+ function parseSkillFile(raw) {
190
+ const m = raw.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
191
+ if (!m)
192
+ return null;
193
+ const fm = m[1];
194
+ const name = fm.match(/^name:\s*(.+)$/m)?.[1]?.trim() || '';
195
+ const description = fm.match(/^description:\s*(.+)$/m)?.[1]?.trim() || '';
196
+ const triggersRaw = fm.match(/^triggers:\s*\[([^\]]*)\]/m)?.[1] || '';
197
+ const triggers = triggersRaw.split(',').map(t => t.trim()).filter(Boolean);
198
+ const created = fm.match(/^created:\s*(.+)$/m)?.[1]?.trim() || '';
199
+ const uses = parseInt(fm.match(/^uses:\s*(\d+)$/m)?.[1] || '0');
200
+ const source = fm.match(/^source_session:\s*(.+)$/m)?.[1]?.trim() || '';
201
+ if (!name)
202
+ return null;
203
+ return { name, description, triggers, steps: m[2].trim(), created, uses, source_session: source };
204
+ }
205
+ /** Save a new skill to disk. */
206
+ export function saveSkill(skill) {
207
+ ensureSkillsDir();
208
+ const filename = skill.name.replace(/[^a-z0-9-]/gi, '-').toLowerCase() + '.md';
209
+ const fm = [
210
+ '---',
211
+ `name: ${skill.name}`,
212
+ `description: ${skill.description}`,
213
+ `triggers: [${skill.triggers.join(', ')}]`,
214
+ `created: ${skill.created}`,
215
+ `uses: ${skill.uses}`,
216
+ `source_session: ${skill.source_session}`,
217
+ '---',
218
+ ].join('\n');
219
+ fs.writeFileSync(path.join(SKILLS_DIR, filename), `${fm}\n${skill.steps}\n`);
220
+ }
221
+ /** Bump use count for a skill. */
222
+ export function bumpSkillUse(skill) {
223
+ const filename = skill.name.replace(/[^a-z0-9-]/gi, '-').toLowerCase() + '.md';
224
+ const fp = path.join(SKILLS_DIR, filename);
225
+ try {
226
+ const raw = fs.readFileSync(fp, 'utf-8');
227
+ fs.writeFileSync(fp, raw.replace(/^uses:\s*\d+$/m, `uses: ${skill.uses + 1}`));
228
+ }
229
+ catch { /* non-critical */ }
230
+ }
231
+ /** Find skills relevant to a user message, by trigger matching. */
232
+ export function matchSkills(input, skills) {
233
+ const lower = input.toLowerCase();
234
+ const scored = [];
235
+ for (const s of skills) {
236
+ let score = 0;
237
+ for (const t of s.triggers) {
238
+ if (lower.includes(t.toLowerCase()))
239
+ score += 2;
240
+ }
241
+ if (lower.includes(s.name.toLowerCase()))
242
+ score += 3;
243
+ score += Math.min(s.uses * 0.5, 3);
244
+ if (score > 0)
245
+ scored.push({ skill: s, score });
246
+ }
247
+ return scored.sort((a, b) => b.score - a.score).slice(0, MAX_SKILLS_IN_PROMPT).map(m => m.skill);
248
+ }
249
+ /** Format matched skills for system prompt injection. */
250
+ export function formatSkillsForPrompt(skills) {
251
+ if (skills.length === 0)
252
+ return '';
253
+ const parts = ['# Learned Skills\nProcedures from previous experience — use when relevant:\n'];
254
+ for (const s of skills) {
255
+ const body = s.steps.length > MAX_SKILL_CHARS ? s.steps.slice(0, MAX_SKILL_CHARS) + '\n…' : s.steps;
256
+ parts.push(`## ${s.name}\n*${s.description}*\n\n${body}`);
257
+ }
258
+ return parts.join('\n\n');
259
+ }
@@ -21,4 +21,20 @@ export interface ExtractionResult {
21
21
  category: LearningCategory;
22
22
  confidence: number;
23
23
  }>;
24
+ /** Procedural skills extracted from complex task patterns. */
25
+ skills?: Array<{
26
+ name: string;
27
+ description: string;
28
+ triggers: string[];
29
+ steps: string;
30
+ }>;
31
+ }
32
+ export interface Skill {
33
+ name: string;
34
+ description: string;
35
+ triggers: string[];
36
+ steps: string;
37
+ created: string;
38
+ uses: number;
39
+ source_session: string;
24
40
  }
@@ -118,9 +118,34 @@ async function execute(input, ctx) {
118
118
  : '';
119
119
  return { output: `No files matched pattern "${pattern}" in ${baseDir}.${hint}` };
120
120
  }
121
- let output = sorted.join('\n');
121
+ // Group by directory for compact output (saves 30-40% tokens on large results)
122
+ let output;
123
+ if (sorted.length > 10) {
124
+ const grouped = new Map();
125
+ for (const p of sorted) {
126
+ const dir = path.dirname(p);
127
+ if (!grouped.has(dir))
128
+ grouped.set(dir, []);
129
+ grouped.get(dir).push(path.basename(p));
130
+ }
131
+ const parts = [];
132
+ for (const [dir, files] of grouped) {
133
+ if (files.length === 1) {
134
+ parts.push(`${dir}/${files[0]}`);
135
+ }
136
+ else {
137
+ parts.push(`${dir}/ (${files.length} files)`);
138
+ for (const f of files)
139
+ parts.push(` ${f}`);
140
+ }
141
+ }
142
+ output = parts.join('\n');
143
+ }
144
+ else {
145
+ output = sorted.join('\n');
146
+ }
122
147
  if (sorted.length >= MAX_RESULTS) {
123
- output += `\n\n... (limited to ${MAX_RESULTS} results. Use a more specific pattern to narrow results.)`;
148
+ output += `\n\n... (limited to ${MAX_RESULTS} results. Use a more specific pattern.)`;
124
149
  }
125
150
  // Cap total output length to prevent context bloat
126
151
  if (output.length > MAX_OUTPUT_CHARS) {
@@ -135,7 +160,7 @@ async function execute(input, ctx) {
135
160
  }
136
161
  const remaining = lines.length - count;
137
162
  if (remaining > 0) {
138
- output = `${trimmed}\n... (${remaining} more paths not shown — use a more specific pattern)`;
163
+ output = `${trimmed}\n... (${remaining} more not shown — use a more specific pattern)`;
139
164
  }
140
165
  }
141
166
  return { output };
@@ -15,6 +15,7 @@ import { askUserCapability } from './askuser.js';
15
15
  import { tradingSignalCapability, tradingMarketCapability } from './trading.js';
16
16
  import { searchXCapability } from './searchx.js';
17
17
  import { postToXCapability } from './posttox.js';
18
+ import { moaCapability } from './moa.js';
18
19
  /** All capabilities available to the Franklin agent (excluding sub-agent, which needs config). */
19
20
  export const allCapabilities = [
20
21
  readCapability,
@@ -32,6 +33,7 @@ export const allCapabilities = [
32
33
  tradingMarketCapability,
33
34
  searchXCapability,
34
35
  postToXCapability,
36
+ moaCapability,
35
37
  ];
36
38
  export { readCapability, writeCapability, editCapability, bashCapability, globCapability, grepCapability, webFetchCapability, webSearchCapability, taskCapability, };
37
39
  export { createSubAgentCapability } from './subagent.js';
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Mixture-of-Agents (MoA) — query multiple models in parallel, aggregate with a strong model.
3
+ *
4
+ * How it works:
5
+ * 1. Send the same prompt to N reference models (cheap/free) in parallel
6
+ * 2. Collect all responses
7
+ * 3. Send all responses + the original prompt to a strong aggregator model
8
+ * 4. Aggregator synthesizes the best answer from all references
9
+ *
10
+ * This produces higher-quality answers than any single model for complex questions.
11
+ * Inspired by the Mixture-of-Agents architecture from Together.ai research.
12
+ */
13
+ import type { CapabilityHandler } from '../agent/types.js';
14
+ export declare const moaCapability: CapabilityHandler;
15
+ /** Register the API URL for MoA tool (called during agent setup). */
16
+ export declare function registerMoAConfig(apiUrl: string, chain: 'base' | 'solana'): void;
@@ -0,0 +1,173 @@
1
+ /**
2
+ * Mixture-of-Agents (MoA) — query multiple models in parallel, aggregate with a strong model.
3
+ *
4
+ * How it works:
5
+ * 1. Send the same prompt to N reference models (cheap/free) in parallel
6
+ * 2. Collect all responses
7
+ * 3. Send all responses + the original prompt to a strong aggregator model
8
+ * 4. Aggregator synthesizes the best answer from all references
9
+ *
10
+ * This produces higher-quality answers than any single model for complex questions.
11
+ * Inspired by the Mixture-of-Agents architecture from Together.ai research.
12
+ */
13
+ import { ModelClient } from '../agent/llm.js';
14
+ // ─── Configuration ────────────────────────────────────────────────────────
15
+ /** Reference models — diverse, cheap/free models for parallel queries. */
16
+ const REFERENCE_MODELS = [
17
+ 'nvidia/nemotron-ultra-253b', // Free, strong reasoning
18
+ 'nvidia/qwen3-coder-480b', // Free, strong coding
19
+ 'google/gemini-2.5-flash', // Fast, cheap
20
+ 'deepseek/deepseek-chat', // Cheap, good reasoning
21
+ ];
22
+ /** Aggregator model — strong model that synthesizes the best answer. */
23
+ const AGGREGATOR_MODEL = 'anthropic/claude-sonnet-4.6';
24
+ /** Max tokens per reference response. */
25
+ const REFERENCE_MAX_TOKENS = 4096;
26
+ /** Max tokens for aggregator. */
27
+ const AGGREGATOR_MAX_TOKENS = 8192;
28
+ /** Timeout per reference model call (ms). */
29
+ const REFERENCE_TIMEOUT_MS = 60_000;
30
+ // ─── Implementation ──────────────────────────────────────────────────────
31
+ // These will be injected at registration time
32
+ let registeredApiUrl = '';
33
+ let registeredChain = 'base';
34
+ async function execute(input, ctx) {
35
+ const { prompt, models, aggregator, include_reasoning } = input;
36
+ if (!prompt) {
37
+ return { output: 'Error: prompt is required', isError: true };
38
+ }
39
+ const referenceModels = models || REFERENCE_MODELS;
40
+ const aggregatorModel = aggregator || AGGREGATOR_MODEL;
41
+ const client = new ModelClient({
42
+ apiUrl: registeredApiUrl,
43
+ chain: registeredChain,
44
+ });
45
+ ctx.onProgress?.('Querying reference models...');
46
+ // Step 1: Query all reference models in parallel
47
+ const referencePromises = referenceModels.map(async (model) => {
48
+ const controller = new AbortController();
49
+ const timer = setTimeout(() => controller.abort(), REFERENCE_TIMEOUT_MS);
50
+ try {
51
+ const response = await client.complete({
52
+ model,
53
+ messages: [{ role: 'user', content: prompt }],
54
+ max_tokens: REFERENCE_MAX_TOKENS,
55
+ stream: false,
56
+ }, controller.signal);
57
+ clearTimeout(timer);
58
+ // Extract text from response
59
+ let text = '';
60
+ if (response.content) {
61
+ for (const part of response.content) {
62
+ if (typeof part === 'string')
63
+ text += part;
64
+ else if (part.type === 'text')
65
+ text += part.text;
66
+ }
67
+ }
68
+ return { model, text: text.trim(), error: null };
69
+ }
70
+ catch (err) {
71
+ clearTimeout(timer);
72
+ return { model, text: '', error: err.message };
73
+ }
74
+ });
75
+ const references = await Promise.all(referencePromises);
76
+ // Filter out failures
77
+ const successRefs = references.filter(r => r.text && !r.error);
78
+ if (successRefs.length === 0) {
79
+ const errors = references.map(r => `${r.model}: ${r.error}`).join('\n');
80
+ return { output: `All reference models failed:\n${errors}`, isError: true };
81
+ }
82
+ ctx.onProgress?.(`${successRefs.length}/${referenceModels.length} responded, aggregating...`);
83
+ // Step 2: Build aggregation prompt
84
+ const refSection = successRefs.map((r, i) => `## Response ${i + 1} (${r.model})\n\n${r.text}`).join('\n\n---\n\n');
85
+ const aggregationPrompt = `You have been given ${successRefs.length} responses to the same question from different AI models. Your job is to synthesize the BEST possible answer by:
86
+
87
+ 1. Identifying the strongest insights from each response
88
+ 2. Resolving any contradictions (prefer verifiable facts)
89
+ 3. Combining the best parts into a single, coherent answer
90
+ 4. Adding any important points that ALL models missed
91
+
92
+ ## Original Question
93
+
94
+ ${prompt}
95
+
96
+ ## Reference Responses
97
+
98
+ ${refSection}
99
+
100
+ ## Your Task
101
+
102
+ Synthesize the best possible answer. Be comprehensive but concise. If the responses agree, be confident. If they disagree, note the disagreement and explain which is more likely correct.`;
103
+ // Step 3: Aggregate with strong model
104
+ try {
105
+ const aggResponse = await client.complete({
106
+ model: aggregatorModel,
107
+ messages: [{ role: 'user', content: aggregationPrompt }],
108
+ max_tokens: AGGREGATOR_MAX_TOKENS,
109
+ stream: false,
110
+ }, ctx.abortSignal);
111
+ let aggText = '';
112
+ if (aggResponse.content) {
113
+ for (const part of aggResponse.content) {
114
+ if (typeof part === 'string')
115
+ aggText += part;
116
+ else if (part.type === 'text')
117
+ aggText += part.text;
118
+ }
119
+ }
120
+ // Build output
121
+ const parts = [];
122
+ parts.push(aggText.trim());
123
+ if (include_reasoning) {
124
+ parts.push('\n\n---\n*Reference responses:*');
125
+ for (const ref of successRefs) {
126
+ parts.push(`\n**${ref.model}:** ${ref.text.slice(0, 500)}${ref.text.length > 500 ? '...' : ''}`);
127
+ }
128
+ }
129
+ // Note which models responded
130
+ const modelList = successRefs.map(r => r.model.split('/').pop()).join(', ');
131
+ const failList = references.filter(r => r.error).map(r => r.model.split('/').pop()).join(', ');
132
+ parts.push(`\n\n*MoA: ${successRefs.length} models (${modelList})${failList ? `, ${failList} failed` : ''} → ${aggregatorModel.split('/').pop()}*`);
133
+ return { output: parts.join('\n') };
134
+ }
135
+ catch (err) {
136
+ return {
137
+ output: `Aggregation failed: ${err.message}\n\nBest reference response (${successRefs[0].model}):\n${successRefs[0].text}`,
138
+ isError: true,
139
+ };
140
+ }
141
+ }
142
+ export const moaCapability = {
143
+ spec: {
144
+ name: 'MixtureOfAgents',
145
+ description: `Query multiple AI models in parallel and synthesize the best answer.
146
+
147
+ Use this for complex questions where a single model might miss important perspectives.
148
+ Sends the prompt to 4 diverse models, then aggregates with a strong model.
149
+
150
+ Parameters:
151
+ - prompt (required): The question or task to send to all models
152
+ - models (optional): Array of model IDs to use as references (default: 4 diverse free/cheap models)
153
+ - aggregator (optional): Model to aggregate responses (default: claude-sonnet-4.6)
154
+ - include_reasoning (optional): If true, include reference responses in output`,
155
+ input_schema: {
156
+ type: 'object',
157
+ required: ['prompt'],
158
+ properties: {
159
+ prompt: { type: 'string', description: 'The question or task to send to all models' },
160
+ models: { type: 'array', items: { type: 'string' }, description: 'Override reference models' },
161
+ aggregator: { type: 'string', description: 'Override aggregator model' },
162
+ include_reasoning: { type: 'boolean', description: 'Include reference responses in output' },
163
+ },
164
+ },
165
+ },
166
+ execute,
167
+ concurrent: true,
168
+ };
169
+ /** Register the API URL for MoA tool (called during agent setup). */
170
+ export function registerMoAConfig(apiUrl, chain) {
171
+ registeredApiUrl = apiUrl;
172
+ registeredChain = chain;
173
+ }
package/dist/ui/app.js CHANGED
@@ -363,7 +363,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
363
363
  // Show user message in scrollback so the conversation is readable
364
364
  setCommittedResponses(rs => [...rs, {
365
365
  key: `user-${Date.now()}`,
366
- text: chalk.cyan('❯') + ' ' + trimmed,
366
+ text: chalk.bold.cyan('❯ ') + chalk.bold(trimmed),
367
367
  tokens: { input: 0, output: 0, calls: 0 },
368
368
  cost: 0,
369
369
  }]);
@@ -398,7 +398,11 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
398
398
  // Mouse support — clicks toggle tool results, drag selects text
399
399
  useEffect(() => {
400
400
  const cleanup = mouse.enable();
401
- const handleClick = (_event) => {
401
+ const handleClick = (event) => {
402
+ // Ignore clicks in the input area (bottom 4 rows of the terminal)
403
+ const termRows = process.stdout.rows ?? 24;
404
+ if (event.row >= termRows - 4)
405
+ return;
402
406
  // Click: toggle expandable tool
403
407
  setExpandableTool(prev => prev ? { ...prev, expanded: !prev.expanded } : null);
404
408
  };
@@ -627,7 +631,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
627
631
  : _jsx(Text, { color: "green", children: "\u2713" }), ' ', _jsx(Text, { bold: true, children: tool.name }), tool.preview ? _jsxs(Text, { dimColor: true, children: ["(", tool.preview.slice(0, 80), ")"] }) : null, _jsxs(Text, { dimColor: true, children: [" ", elapsedFmt] })] }), tool.diff && !tool.error && tool.diff.oldLines.length <= 8 && tool.diff.newLines.length <= 8 && (_jsxs(Box, { flexDirection: "column", marginLeft: 2, children: [tool.diff.oldLines.map((line, i) => (_jsxs(Text, { color: "red", wrap: "truncate-end", children: ['⎿ ', "- ", line.slice(0, 120)] }, `old-${i}`))), tool.diff.newLines.map((line, i) => (_jsxs(Text, { color: "green", wrap: "truncate-end", children: ['⎿ ', "+ ", line.slice(0, 120)] }, `new-${i}`)))] })), tool.diff && !tool.error && (tool.diff.oldLines.length > 8 || tool.diff.newLines.length > 8) && (_jsx(Box, { marginLeft: 2, children: _jsxs(Text, { dimColor: true, children: ['⎿ ', tool.diff.oldLines.length, " lines \u2192 ", tool.diff.newLines.length, " lines"] }) })), tool.error && tool.fullOutput && (_jsx(Box, { flexDirection: "column", marginLeft: 2, children: tool.fullOutput.split('\n').filter(Boolean).slice(0, 3).map((line, i) => (_jsxs(Text, { color: "red", wrap: "truncate-end", children: ['⎿ ', line.slice(0, 120)] }, i))) }))] }, tool.key));
628
632
  } }), _jsx(Static, { items: committedResponses, children: (r) => {
629
633
  const isUserMsg = r.key.startsWith('user-');
630
- return (_jsxs(Box, { flexDirection: "column", children: [!isUserMsg && (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginTop: 1, children: _jsx(Text, { dimColor: true, children: '─'.repeat(60) }) })), _jsx(Text, { wrap: "wrap", children: renderMarkdown(r.text) }), (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginLeft: 1, marginBottom: 1, children: _jsxs(Text, { dimColor: true, children: [r.tier && _jsxs(Text, { color: "cyan", children: ["[", r.tier, "] "] }), r.model ? shortModelName(r.model) : '', r.model ? ' · ' : '', r.tokens.calls > 0 && r.tokens.input === 0
634
+ return (_jsxs(Box, { flexDirection: "column", children: [!isUserMsg && (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginTop: 1, children: _jsx(Text, { dimColor: true, children: '─'.repeat(60) }) })), isUserMsg && (_jsx(Box, { marginTop: 1 })), _jsx(Box, { paddingLeft: isUserMsg ? 0 : 2, children: _jsx(Text, { wrap: "wrap", children: renderMarkdown(r.text) }) }), (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginLeft: 1, marginBottom: 1, children: _jsxs(Text, { dimColor: true, children: [r.tier && _jsxs(Text, { color: "cyan", children: ["[", r.tier, "] "] }), r.model ? shortModelName(r.model) : '', r.model ? ' · ' : '', r.tokens.calls > 0 && r.tokens.input === 0
631
635
  ? `${r.tokens.calls} calls`
632
636
  : `${formatTokens(r.tokens.input)} in / ${formatTokens(r.tokens.output)} out`, r.cost > 0 ? ` · $${r.cost.toFixed(4)}` : '', r.savings !== undefined && r.savings > 0 ? _jsxs(Text, { color: "green", children: [" saved ", Math.round(r.savings * 100), "%"] }) : ''] }) }))] }, r.key));
633
637
  } }), permissionRequest && (_jsxs(Box, { flexDirection: "column", marginTop: 1, marginLeft: 1, children: [_jsx(Text, { color: "yellow", children: " \u256D\u2500 Permission required \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Text, { color: "yellow", children: [" \u2502 ", _jsx(Text, { bold: true, children: permissionRequest.toolName })] }), permissionRequest.description.split('\n').map((line, i) => (_jsxs(Text, { dimColor: true, children: [" \u2502 ", line] }, i))), _jsx(Text, { color: "yellow", children: " \u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsx(Box, { marginLeft: 3, children: _jsxs(Text, { children: [_jsx(Text, { bold: true, color: "green", children: "[y]" }), _jsx(Text, { dimColor: true, children: " yes " }), _jsx(Text, { bold: true, color: "cyan", children: "[a]" }), _jsx(Text, { dimColor: true, children: " always " }), _jsx(Text, { bold: true, color: "red", children: "[n]" }), _jsx(Text, { dimColor: true, children: " no" })] }) })] })), askUserRequest && (_jsxs(Box, { flexDirection: "column", marginTop: 1, marginLeft: 1, children: [_jsx(Text, { color: "cyan", children: " \u256D\u2500 Question \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Text, { color: "cyan", children: [" \u2502 ", _jsx(Text, { bold: true, children: askUserRequest.question })] }), askUserRequest.options && askUserRequest.options.length > 0 && (askUserRequest.options.map((opt, i) => (_jsxs(Text, { dimColor: true, children: [" \u2502 ", i + 1, ". ", opt] }, i)))), _jsx(Text, { color: "cyan", children: " \u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Box, { marginLeft: 3, children: [_jsx(Text, { bold: true, children: "answer> " }), _jsx(TextInput, { value: askUserInput, onChange: setAskUserInput, onSubmit: (val) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.6.4",
3
+ "version": "3.6.6",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {