@blockrun/franklin 3.6.5 → 3.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/commands.js +4 -1
- package/dist/agent/context.js +67 -3
- package/dist/agent/loop.js +35 -4
- package/dist/agent/types.d.ts +2 -0
- package/dist/agent/verification.d.ts +42 -0
- package/dist/agent/verification.js +206 -0
- package/dist/commands/start.js +4 -1
- package/dist/learnings/extractor.d.ts +5 -0
- package/dist/learnings/extractor.js +118 -2
- package/dist/learnings/index.d.ts +3 -3
- package/dist/learnings/index.js +2 -2
- package/dist/learnings/store.d.ts +11 -1
- package/dist/learnings/store.js +100 -0
- package/dist/learnings/types.d.ts +16 -0
- package/dist/tools/index.js +2 -0
- package/dist/tools/moa.d.ts +16 -0
- package/dist/tools/moa.js +173 -0
- package/dist/ui/app.js +7 -3
- package/package.json +1 -1
package/dist/agent/commands.js
CHANGED
|
@@ -202,7 +202,7 @@ const DIRECT_COMMANDS = {
|
|
|
202
202
|
` **Git:** /push /pr /undo /status /diff /log /branch /stash /unstash\n` +
|
|
203
203
|
` **Analysis:** /security /lint /optimize /todo /deps /clean /migrate /doc\n` +
|
|
204
204
|
` **Session:** /plan /ultraplan /execute /compact /retry /sessions /resume /session-search /context /tasks\n` +
|
|
205
|
-
` **Power:** /ultrathink [query] /ultraplan /noplan /dump\n` +
|
|
205
|
+
` **Power:** /ultrathink [query] /ultraplan /noplan /moa [query] /dump\n` +
|
|
206
206
|
` **Info:** /model /wallet /cost /tokens /learnings /brain /mcp /doctor /version /bug /help\n` +
|
|
207
207
|
` **UI:** /clear /exit\n` +
|
|
208
208
|
(ultrathinkOn ? `\n Ultrathink: ON\n` : '')
|
|
@@ -536,6 +536,8 @@ const ARG_COMMANDS = [
|
|
|
536
536
|
{ prefix: '/refactor ', rewrite: (a) => `Refactor: ${a}. Read the relevant code first, then make targeted changes. Explain each change.` },
|
|
537
537
|
{ prefix: '/scaffold ', rewrite: (a) => `Create the scaffolding/boilerplate for: ${a}. Generate the file structure and initial code. Ask me if you need clarification on requirements.` },
|
|
538
538
|
{ prefix: '/doc ', rewrite: (a) => `Generate documentation for ${a}. Include: purpose, API/interface description, usage examples, and important notes.` },
|
|
539
|
+
{ prefix: '/moa ', rewrite: (a) => `Use the MixtureOfAgents tool to get a high-quality answer by querying multiple AI models in parallel: ${a}` },
|
|
540
|
+
{ prefix: '/moa', rewrite: () => `Use the MixtureOfAgents tool. Ask me what question I want answered by multiple models.` },
|
|
539
541
|
];
|
|
540
542
|
// ─── Main dispatch ────────────────────────────────────────────────────────
|
|
541
543
|
/**
|
|
@@ -667,6 +669,7 @@ export async function handleSlashCommand(input, ctx) {
|
|
|
667
669
|
else {
|
|
668
670
|
const newModel = resolveModel(input.slice(7).trim());
|
|
669
671
|
ctx.config.model = newModel;
|
|
672
|
+
ctx.config.baseModel = newModel; // Update recovery target so loop doesn't reset
|
|
670
673
|
ctx.config.onModelChange?.(newModel);
|
|
671
674
|
ctx.onEvent({ kind: 'text_delta', text: `Model → **${newModel}**\n` });
|
|
672
675
|
}
|
package/dist/agent/context.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
import fs from 'node:fs';
|
|
6
6
|
import path from 'node:path';
|
|
7
7
|
import { execSync } from 'node:child_process';
|
|
8
|
-
import { loadLearnings, decayLearnings, saveLearnings, formatForPrompt } from '../learnings/store.js';
|
|
8
|
+
import { loadLearnings, decayLearnings, saveLearnings, formatForPrompt, loadSkills, formatSkillsForPrompt } from '../learnings/store.js';
|
|
9
9
|
// ─── System Instructions Assembly ──────────────────────────────────────────
|
|
10
10
|
// Composable prompt sections — each independently maintainable and conditionally includable.
|
|
11
11
|
function getCoreInstructions() {
|
|
@@ -186,10 +186,16 @@ export function assembleInstructions(workingDir, model) {
|
|
|
186
186
|
getTokenEfficiencySection(),
|
|
187
187
|
getVerificationSection(),
|
|
188
188
|
];
|
|
189
|
-
// Read RUNCODE.md or CLAUDE.md from the project
|
|
189
|
+
// Read RUNCODE.md or CLAUDE.md from the project (with injection scanning)
|
|
190
190
|
const projectConfig = readProjectConfig(workingDir);
|
|
191
191
|
if (projectConfig) {
|
|
192
|
-
|
|
192
|
+
const { sanitized, threats } = scanForInjection(projectConfig);
|
|
193
|
+
if (threats.length > 0) {
|
|
194
|
+
parts.push(`# Project Instructions\n\n⚠️ WARNING: ${threats.length} suspicious pattern(s) detected in project config and neutralized.\n\n${sanitized}`);
|
|
195
|
+
}
|
|
196
|
+
else {
|
|
197
|
+
parts.push(`# Project Instructions\n\n${projectConfig}`);
|
|
198
|
+
}
|
|
193
199
|
}
|
|
194
200
|
// Inject environment info
|
|
195
201
|
parts.push(buildEnvironmentSection(workingDir));
|
|
@@ -210,6 +216,18 @@ export function assembleInstructions(workingDir, model) {
|
|
|
210
216
|
}
|
|
211
217
|
}
|
|
212
218
|
catch { /* learnings are optional — never block startup */ }
|
|
219
|
+
// Inject relevant skills (procedural memory from past complex tasks)
|
|
220
|
+
try {
|
|
221
|
+
const allSkills = loadSkills();
|
|
222
|
+
if (allSkills.length > 0) {
|
|
223
|
+
// Skills are matched lazily on first user message — for now inject top skills by use count
|
|
224
|
+
const topSkills = allSkills.sort((a, b) => b.uses - a.uses).slice(0, 5);
|
|
225
|
+
const skillsSection = formatSkillsForPrompt(topSkills);
|
|
226
|
+
if (skillsSection)
|
|
227
|
+
parts.push(skillsSection);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
catch { /* skills are optional */ }
|
|
213
231
|
// Model-specific execution guidance
|
|
214
232
|
if (model) {
|
|
215
233
|
parts.push(getModelGuidance(model));
|
|
@@ -276,6 +294,52 @@ export function invalidateInstructionCache(workingDir) {
|
|
|
276
294
|
_instructionCache.clear();
|
|
277
295
|
}
|
|
278
296
|
}
|
|
297
|
+
// ─── Prompt Injection Detection ────────────────────────────────────────────
|
|
298
|
+
/** Patterns that indicate potential prompt injection in context files. */
|
|
299
|
+
const INJECTION_PATTERNS = [
|
|
300
|
+
// Direct instruction override attempts
|
|
301
|
+
{ pattern: /ignore\s+(all\s+)?previous\s+instructions/i, description: 'instruction override' },
|
|
302
|
+
{ pattern: /disregard\s+(all\s+)?(previous\s+|above\s+)?rules/i, description: 'rule disregard' },
|
|
303
|
+
{ pattern: /forget\s+(everything|all|your)\s+(you|instructions|rules)/i, description: 'memory wipe' },
|
|
304
|
+
{ pattern: /you\s+are\s+now\s+(?:a\s+)?(?:different|new|unrestricted)/i, description: 'identity hijack' },
|
|
305
|
+
{ pattern: /system\s*:\s*you\s+are/i, description: 'fake system message' },
|
|
306
|
+
// Dangerous command injection
|
|
307
|
+
{ pattern: /execute\s+(curl|wget|bash|sh|python|node)\b/i, description: 'command execution' },
|
|
308
|
+
{ pattern: /\bcat\s+\/etc\/(passwd|shadow|sudoers)/i, description: 'credential access' },
|
|
309
|
+
{ pattern: /\brm\s+-rf\s+[\/~]/i, description: 'destructive command' },
|
|
310
|
+
{ pattern: /\beval\s*\(/i, description: 'eval injection' },
|
|
311
|
+
// Data exfiltration
|
|
312
|
+
{ pattern: /\bcurl\s+.*\|\s*(bash|sh)/i, description: 'pipe to shell' },
|
|
313
|
+
{ pattern: /send\s+(to|via)\s+(http|webhook|url)/i, description: 'data exfiltration' },
|
|
314
|
+
// HTML/comment injection
|
|
315
|
+
{ pattern: /<!--[\s\S]*?-->/g, description: 'HTML comment injection' },
|
|
316
|
+
];
|
|
317
|
+
/** Invisible unicode characters that can hide malicious content. */
|
|
318
|
+
const INVISIBLE_UNICODE = /[\u200B-\u200F\u202A-\u202E\u2060-\u2064\uFEFF\u00AD]/g;
|
|
319
|
+
/**
|
|
320
|
+
* Scan text for prompt injection patterns and invisible unicode.
|
|
321
|
+
* Returns sanitized text with threats neutralized and a list of detections.
|
|
322
|
+
*/
|
|
323
|
+
function scanForInjection(text) {
|
|
324
|
+
const threats = [];
|
|
325
|
+
let sanitized = text;
|
|
326
|
+
// Check for invisible unicode
|
|
327
|
+
if (INVISIBLE_UNICODE.test(sanitized)) {
|
|
328
|
+
const count = (sanitized.match(INVISIBLE_UNICODE) || []).length;
|
|
329
|
+
threats.push(`${count} invisible unicode character(s) removed`);
|
|
330
|
+
sanitized = sanitized.replace(INVISIBLE_UNICODE, '');
|
|
331
|
+
}
|
|
332
|
+
// Check for injection patterns
|
|
333
|
+
for (const { pattern, description } of INJECTION_PATTERNS) {
|
|
334
|
+
const matches = sanitized.match(pattern);
|
|
335
|
+
if (matches) {
|
|
336
|
+
threats.push(`${description}: "${matches[0].slice(0, 50)}"`);
|
|
337
|
+
// Neutralize by wrapping in brackets (visible but defanged)
|
|
338
|
+
sanitized = sanitized.replace(pattern, (match) => `[BLOCKED: ${match}]`);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
return { sanitized, threats };
|
|
342
|
+
}
|
|
279
343
|
// ─── Project Config ────────────────────────────────────────────────────────
|
|
280
344
|
/**
|
|
281
345
|
* Look for RUNCODE.md, then CLAUDE.md in the working directory and parents.
|
package/dist/agent/loop.js
CHANGED
|
@@ -19,6 +19,7 @@ import { maybeMidSessionExtract } from '../learnings/extractor.js';
|
|
|
19
19
|
import { routeRequest, parseRoutingProfile } from '../router/index.js';
|
|
20
20
|
import { recordOutcome } from '../router/local-elo.js';
|
|
21
21
|
import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
|
|
22
|
+
import { shouldVerify, runVerification } from './verification.js';
|
|
22
23
|
import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
|
|
23
24
|
/**
|
|
24
25
|
* Atomically replace all elements in a history array.
|
|
@@ -218,7 +219,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
218
219
|
const permissions = new PermissionManager(config.permissionMode ?? 'default', config.permissionPromptFn);
|
|
219
220
|
const history = [];
|
|
220
221
|
let lastUserInput = ''; // For /retry
|
|
221
|
-
|
|
222
|
+
config.baseModel = config.model; // User's intended model — /model command updates this
|
|
222
223
|
let turnFailedModels = new Set(); // Models that failed this turn (cleared each new turn)
|
|
223
224
|
// Track models that failed with 402 (payment required) across turns.
|
|
224
225
|
// These persist until the session ends — unlike transient errors, payment failures
|
|
@@ -294,9 +295,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
294
295
|
// ── Model recovery: try original model at the start of each new turn ──
|
|
295
296
|
// If we fell back to a free model last turn due to a transient error, try original again.
|
|
296
297
|
// But DON'T reset if the original model had a payment failure — it will just fail again.
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
config.
|
|
298
|
+
const baseModel = config.baseModel ?? config.model;
|
|
299
|
+
if (config.model !== baseModel && !paymentFailedModels.has(baseModel)) {
|
|
300
|
+
config.model = baseModel;
|
|
301
|
+
config.onModelChange?.(baseModel);
|
|
300
302
|
}
|
|
301
303
|
turnFailedModels = new Set(); // Fresh slate for transient failures this turn
|
|
302
304
|
const abort = new AbortController();
|
|
@@ -714,6 +716,35 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
714
716
|
});
|
|
715
717
|
}
|
|
716
718
|
}
|
|
719
|
+
// ── Verification gate: run adversarial checks on substantial work ──
|
|
720
|
+
if (shouldVerify(turnToolCalls, turnToolCounts, lastUserInput || '')) {
|
|
721
|
+
try {
|
|
722
|
+
const vResult = await runVerification(history, capabilityMap, client, {
|
|
723
|
+
model: config.model,
|
|
724
|
+
workDir,
|
|
725
|
+
abortSignal: abort.signal,
|
|
726
|
+
onEvent: (e) => { if (e.kind === 'text_delta' && e.text)
|
|
727
|
+
onEvent({ kind: 'text_delta', text: e.text }); },
|
|
728
|
+
});
|
|
729
|
+
if (vResult.verdict === 'FAIL' && vResult.issues.length > 0) {
|
|
730
|
+
// Inject verification feedback — agent will see this and continue fixing
|
|
731
|
+
const feedbackMsg = {
|
|
732
|
+
role: 'user',
|
|
733
|
+
content: `[VERIFICATION FAILED]\n${vResult.summary}\n\nFix the issues above and verify your fixes work.`,
|
|
734
|
+
};
|
|
735
|
+
history.push(feedbackMsg);
|
|
736
|
+
persistSessionMessage(feedbackMsg);
|
|
737
|
+
onEvent({ kind: 'text_delta', text: `\n⚠️ *Verification found issues — fixing...*\n` });
|
|
738
|
+
continue; // Re-enter the loop to fix issues
|
|
739
|
+
}
|
|
740
|
+
if (vResult.verdict === 'PASS') {
|
|
741
|
+
onEvent({ kind: 'text_delta', text: '\n✓ *Verified*\n' });
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
catch {
|
|
745
|
+
// Verification errors never block the main flow
|
|
746
|
+
}
|
|
747
|
+
}
|
|
717
748
|
// Record success for local Elo learning (include tool call count for efficiency)
|
|
718
749
|
if (lastRoutedCategory && lastRoutedModel) {
|
|
719
750
|
recordOutcome(lastRoutedCategory, lastRoutedModel, 'continued', turnToolCalls);
|
package/dist/agent/types.d.ts
CHANGED
|
@@ -142,4 +142,6 @@ export interface AgentConfig {
|
|
|
142
142
|
onAskUser?: (question: string, options?: string[]) => Promise<string>;
|
|
143
143
|
/** Notify UI when agent switches model (e.g. payment fallback) */
|
|
144
144
|
onModelChange?: (model: string) => void;
|
|
145
|
+
/** The user's intended model — updated by /model command, used for turn recovery */
|
|
146
|
+
baseModel?: string;
|
|
145
147
|
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verification Agent — adversarial testing gate.
|
|
3
|
+
*
|
|
4
|
+
* After the main agent completes substantial work (writes/edits files, runs commands),
|
|
5
|
+
* this agent runs independently to try to BREAK what was built. It can only read and
|
|
6
|
+
* execute — never modify files. Returns PASS/FAIL/PARTIAL verdict.
|
|
7
|
+
*
|
|
8
|
+
* If FAIL: injects feedback into conversation so the main agent can fix issues.
|
|
9
|
+
* If PASS: work is considered verified.
|
|
10
|
+
*
|
|
11
|
+
* Inspired by Claude Code's verification agent architecture.
|
|
12
|
+
*/
|
|
13
|
+
import type { CapabilityHandler, Dialogue } from './types.js';
|
|
14
|
+
import { ModelClient } from './llm.js';
|
|
15
|
+
export interface VerificationResult {
|
|
16
|
+
verdict: 'PASS' | 'FAIL' | 'PARTIAL' | 'SKIPPED';
|
|
17
|
+
summary: string;
|
|
18
|
+
issues: string[];
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Should we run verification for this turn?
|
|
22
|
+
* Only for substantial work: 3+ tool calls AND at least one write/edit/bash.
|
|
23
|
+
*/
|
|
24
|
+
export declare function shouldVerify(turnToolCalls: number, turnToolCounts: Map<string, number>, userInput: string): boolean;
|
|
25
|
+
/**
|
|
26
|
+
* Filter capability handlers to only allow read-only tools.
|
|
27
|
+
* Bash is allowed (for running tests/builds) but Edit/Write are blocked.
|
|
28
|
+
*/
|
|
29
|
+
export declare function getVerificationTools(handlers: Map<string, CapabilityHandler>): Map<string, CapabilityHandler>;
|
|
30
|
+
/**
|
|
31
|
+
* Run the verification agent on the current conversation state.
|
|
32
|
+
* Uses a cheap model to minimize cost. Returns verdict + issues.
|
|
33
|
+
*/
|
|
34
|
+
export declare function runVerification(history: Dialogue[], handlers: Map<string, CapabilityHandler>, client: ModelClient, config: {
|
|
35
|
+
model: string;
|
|
36
|
+
workDir: string;
|
|
37
|
+
abortSignal: AbortSignal;
|
|
38
|
+
onEvent?: (event: {
|
|
39
|
+
kind: string;
|
|
40
|
+
text?: string;
|
|
41
|
+
}) => void;
|
|
42
|
+
}): Promise<VerificationResult>;
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verification Agent — adversarial testing gate.
|
|
3
|
+
*
|
|
4
|
+
* After the main agent completes substantial work (writes/edits files, runs commands),
|
|
5
|
+
* this agent runs independently to try to BREAK what was built. It can only read and
|
|
6
|
+
* execute — never modify files. Returns PASS/FAIL/PARTIAL verdict.
|
|
7
|
+
*
|
|
8
|
+
* If FAIL: injects feedback into conversation so the main agent can fix issues.
|
|
9
|
+
* If PASS: work is considered verified.
|
|
10
|
+
*
|
|
11
|
+
* Inspired by Claude Code's verification agent architecture.
|
|
12
|
+
*/
|
|
13
|
+
// ─── Verification System Prompt ───────────────────────────────────────────
|
|
14
|
+
const VERIFICATION_PROMPT = `You are a VERIFICATION agent. Your job is NOT to confirm that code works — it is to TRY TO BREAK IT.
|
|
15
|
+
|
|
16
|
+
## Rules
|
|
17
|
+
|
|
18
|
+
1. **Adversarial mindset**: Assume the code has bugs. Your goal is to find them.
|
|
19
|
+
2. **No modifications**: You may ONLY use Read, Bash, Glob, and Grep tools. You MUST NOT use Edit, Write, or any tool that modifies files.
|
|
20
|
+
3. **Evidence required**: Every check MUST include:
|
|
21
|
+
- What you tested (the exact command or operation)
|
|
22
|
+
- The actual output
|
|
23
|
+
- Whether it PASSED or FAILED
|
|
24
|
+
4. **No rationalization**: These phrases are NEVER acceptable as evidence:
|
|
25
|
+
- "The code looks correct"
|
|
26
|
+
- "This should work"
|
|
27
|
+
- "Based on the implementation, it handles..."
|
|
28
|
+
- "The tests pass" (unless you actually ran them and showed output)
|
|
29
|
+
|
|
30
|
+
## What to Check
|
|
31
|
+
|
|
32
|
+
1. **Does it compile/build?** Run the build command.
|
|
33
|
+
2. **Do tests pass?** Run the test suite.
|
|
34
|
+
3. **Edge cases**: Empty inputs, very large inputs, missing files, invalid data.
|
|
35
|
+
4. **Error handling**: What happens when things go wrong?
|
|
36
|
+
5. **Consistency**: Does the change break other parts of the codebase?
|
|
37
|
+
|
|
38
|
+
## Output Format
|
|
39
|
+
|
|
40
|
+
After running your checks, output a verdict in EXACTLY this format:
|
|
41
|
+
|
|
42
|
+
VERDICT: PASS|FAIL|PARTIAL
|
|
43
|
+
|
|
44
|
+
Then explain:
|
|
45
|
+
- What you tested
|
|
46
|
+
- What passed
|
|
47
|
+
- What failed (if any)
|
|
48
|
+
- Specific issues to fix (if FAIL)
|
|
49
|
+
|
|
50
|
+
Keep it concise — focus on actionable findings, not narration.`;
|
|
51
|
+
// ─── Thresholds ──────────────────────────────────────────────────────────
|
|
52
|
+
/** Only verify turns where substantial work was done. */
|
|
53
|
+
const WRITE_TOOLS = new Set(['Edit', 'Write', 'Bash']);
|
|
54
|
+
/** Minimum tool calls to trigger verification. */
|
|
55
|
+
const MIN_TOOL_CALLS = 3;
|
|
56
|
+
/** Maximum tokens to spend on verification (prevent runaway). */
|
|
57
|
+
const MAX_VERIFICATION_TOKENS = 8192;
|
|
58
|
+
// ─── Decision Logic ──────────────────────────────────────────────────────
|
|
59
|
+
/**
|
|
60
|
+
* Should we run verification for this turn?
|
|
61
|
+
* Only for substantial work: 3+ tool calls AND at least one write/edit/bash.
|
|
62
|
+
*/
|
|
63
|
+
export function shouldVerify(turnToolCalls, turnToolCounts, userInput) {
|
|
64
|
+
// Skip if not enough tool calls
|
|
65
|
+
if (turnToolCalls < MIN_TOOL_CALLS)
|
|
66
|
+
return false;
|
|
67
|
+
// Skip if no write-like tools were used
|
|
68
|
+
let hasWriteTool = false;
|
|
69
|
+
for (const [name] of turnToolCounts) {
|
|
70
|
+
if (WRITE_TOOLS.has(name)) {
|
|
71
|
+
hasWriteTool = true;
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
if (!hasWriteTool)
|
|
76
|
+
return false;
|
|
77
|
+
// Skip if user explicitly asked for something quick
|
|
78
|
+
const lower = userInput.toLowerCase();
|
|
79
|
+
if (lower.startsWith('/') || lower.length < 20)
|
|
80
|
+
return false;
|
|
81
|
+
return true;
|
|
82
|
+
}
|
|
83
|
+
// ─── Read-only tool filter ───────────────────────────────────────────────
|
|
84
|
+
const READ_ONLY_TOOLS = new Set(['Read', 'Glob', 'Grep', 'Bash', 'WebSearch', 'WebFetch']);
|
|
85
|
+
/**
|
|
86
|
+
* Filter capability handlers to only allow read-only tools.
|
|
87
|
+
* Bash is allowed (for running tests/builds) but Edit/Write are blocked.
|
|
88
|
+
*/
|
|
89
|
+
export function getVerificationTools(handlers) {
|
|
90
|
+
const filtered = new Map();
|
|
91
|
+
for (const [name, handler] of handlers) {
|
|
92
|
+
if (READ_ONLY_TOOLS.has(name)) {
|
|
93
|
+
filtered.set(name, handler);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return filtered;
|
|
97
|
+
}
|
|
98
|
+
// ─── Run Verification ────────────────────────────────────────────────────
|
|
99
|
+
/**
|
|
100
|
+
* Run the verification agent on the current conversation state.
|
|
101
|
+
* Uses a cheap model to minimize cost. Returns verdict + issues.
|
|
102
|
+
*/
|
|
103
|
+
export async function runVerification(history, handlers, client, config) {
|
|
104
|
+
const verificationTools = getVerificationTools(handlers);
|
|
105
|
+
// Build verification prompt from recent history context
|
|
106
|
+
const recentWork = extractRecentWork(history);
|
|
107
|
+
if (!recentWork) {
|
|
108
|
+
return { verdict: 'SKIPPED', summary: 'No recent work to verify.', issues: [] };
|
|
109
|
+
}
|
|
110
|
+
const verificationHistory = [
|
|
111
|
+
{
|
|
112
|
+
role: 'user',
|
|
113
|
+
content: `The following work was just completed. Your job is to VERIFY it by running adversarial checks.\n\n${recentWork}\n\nRun build, tests, and edge case checks. Output your VERDICT.`,
|
|
114
|
+
},
|
|
115
|
+
];
|
|
116
|
+
config.onEvent?.({ kind: 'text_delta', text: '\n*Verifying...*\n' });
|
|
117
|
+
// Use cheap model for verification
|
|
118
|
+
const verificationModel = 'nvidia/nemotron-ultra-253b'; // Free model to keep cost zero
|
|
119
|
+
try {
|
|
120
|
+
// Simple single-turn verification call
|
|
121
|
+
const response = await client.complete({
|
|
122
|
+
model: verificationModel,
|
|
123
|
+
system: VERIFICATION_PROMPT,
|
|
124
|
+
messages: verificationHistory,
|
|
125
|
+
tools: Array.from(verificationTools.values()).map(h => h.spec),
|
|
126
|
+
max_tokens: MAX_VERIFICATION_TOKENS,
|
|
127
|
+
});
|
|
128
|
+
// Extract text from response
|
|
129
|
+
let responseText = '';
|
|
130
|
+
if (response.content) {
|
|
131
|
+
for (const part of response.content) {
|
|
132
|
+
if (typeof part === 'string') {
|
|
133
|
+
responseText += part;
|
|
134
|
+
}
|
|
135
|
+
else if (part.type === 'text') {
|
|
136
|
+
responseText += part.text;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
// Parse verdict
|
|
141
|
+
const verdictMatch = responseText.match(/VERDICT:\s*(PASS|FAIL|PARTIAL)/i);
|
|
142
|
+
const verdict = verdictMatch
|
|
143
|
+
? verdictMatch[1].toUpperCase()
|
|
144
|
+
: 'PARTIAL';
|
|
145
|
+
// Extract issues
|
|
146
|
+
const issues = [];
|
|
147
|
+
const issueLines = responseText.split('\n').filter(l => l.match(/^[-•*]\s*(FAIL|ERROR|BUG|ISSUE|PROBLEM)/i) ||
|
|
148
|
+
l.match(/^[-•*]\s+.*fail/i));
|
|
149
|
+
for (const line of issueLines) {
|
|
150
|
+
issues.push(line.replace(/^[-•*]\s*/, '').trim());
|
|
151
|
+
}
|
|
152
|
+
return { verdict, summary: responseText.slice(0, 500), issues };
|
|
153
|
+
}
|
|
154
|
+
catch (err) {
|
|
155
|
+
// Verification failure should never block the main flow
|
|
156
|
+
return {
|
|
157
|
+
verdict: 'SKIPPED',
|
|
158
|
+
summary: `Verification error: ${err.message}`,
|
|
159
|
+
issues: [],
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Extract a summary of recent work from the conversation history.
|
|
165
|
+
* Looks at the last assistant turn and its tool calls.
|
|
166
|
+
*/
|
|
167
|
+
function extractRecentWork(history) {
|
|
168
|
+
const parts = [];
|
|
169
|
+
// Walk backwards through history to find recent tool uses and assistant messages
|
|
170
|
+
let found = 0;
|
|
171
|
+
for (let i = history.length - 1; i >= 0 && found < 10; i--) {
|
|
172
|
+
const msg = history[i];
|
|
173
|
+
const role = msg.role;
|
|
174
|
+
// Stop at a pure user message boundary (not a tool_result user message)
|
|
175
|
+
if (role === 'user' && !Array.isArray(msg.content))
|
|
176
|
+
break;
|
|
177
|
+
if (role === 'assistant' && Array.isArray(msg.content)) {
|
|
178
|
+
for (const part of msg.content) {
|
|
179
|
+
if (typeof part === 'object') {
|
|
180
|
+
if (part.type === 'text' && part.text) {
|
|
181
|
+
parts.unshift(`Assistant: ${part.text.slice(0, 500)}`);
|
|
182
|
+
found++;
|
|
183
|
+
}
|
|
184
|
+
else if (part.type === 'tool_use') {
|
|
185
|
+
parts.unshift(`Tool: ${part.name}(${JSON.stringify(part.input).slice(0, 200)})`);
|
|
186
|
+
found++;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
else if (role === 'user' && Array.isArray(msg.content)) {
|
|
192
|
+
for (const part of msg.content) {
|
|
193
|
+
if (typeof part === 'object' && part.type === 'tool_result') {
|
|
194
|
+
const output = typeof part.content === 'string'
|
|
195
|
+
? part.content
|
|
196
|
+
: Array.isArray(part.content)
|
|
197
|
+
? part.content.map(c => c.text || '').join('\n')
|
|
198
|
+
: '';
|
|
199
|
+
parts.unshift(`Result: ${output.slice(0, 300)}`);
|
|
200
|
+
found++;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
return parts.length > 0 ? parts.join('\n\n') : null;
|
|
206
|
+
}
|
package/dist/commands/start.js
CHANGED
|
@@ -130,8 +130,11 @@ export async function startCommand(options) {
|
|
|
130
130
|
}
|
|
131
131
|
}
|
|
132
132
|
}
|
|
133
|
-
// Build capabilities (built-in + MCP + sub-agent)
|
|
133
|
+
// Build capabilities (built-in + MCP + sub-agent + MoA)
|
|
134
134
|
const subAgent = createSubAgentCapability(apiUrl, chain, allCapabilities);
|
|
135
|
+
// Register MoA tool config (needs API URL for parallel model queries)
|
|
136
|
+
const { registerMoAConfig } = await import('../tools/moa.js');
|
|
137
|
+
registerMoAConfig(apiUrl, chain);
|
|
135
138
|
const capabilities = [...allCapabilities, ...mcpTools, subAgent];
|
|
136
139
|
// Validate tool descriptions (self-evolution: detect SearchX-style description bugs)
|
|
137
140
|
if (options.debug) {
|
|
@@ -14,6 +14,11 @@ export declare function bootstrapFromClaudeConfig(client: ModelClient): Promise<
|
|
|
14
14
|
* Runs asynchronously — caller should fire-and-forget.
|
|
15
15
|
*/
|
|
16
16
|
export declare function extractLearnings(history: Dialogue[], sessionId: string, client: ModelClient): Promise<void>;
|
|
17
|
+
/**
|
|
18
|
+
* Try to extract a reusable skill from the recent work.
|
|
19
|
+
* Called from maybeMidSessionExtract when enough tool calls happened.
|
|
20
|
+
*/
|
|
21
|
+
export declare function maybeExtractSkill(history: Dialogue[], turnToolCalls: number, sessionId: string, client: ModelClient): Promise<void>;
|
|
17
22
|
/**
|
|
18
23
|
* Check if mid-session extraction should run, and if so, run it in background.
|
|
19
24
|
* Called from the agent loop after tool execution completes.
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
import fs from 'node:fs';
|
|
6
6
|
import path from 'node:path';
|
|
7
7
|
import os from 'node:os';
|
|
8
|
-
import { loadLearnings, mergeLearning, saveLearnings } from './store.js';
|
|
8
|
+
import { loadLearnings, mergeLearning, saveLearnings, loadSkills, saveSkill } from './store.js';
|
|
9
9
|
// Free models for learning extraction — JSON extraction is simple enough.
|
|
10
10
|
// Ordered by reliability: try the best free model first, fall back to others.
|
|
11
11
|
const EXTRACTION_MODELS = [
|
|
@@ -242,6 +242,120 @@ async function runExtraction(condensed, sessionId, client) {
|
|
|
242
242
|
}
|
|
243
243
|
saveLearnings(existing);
|
|
244
244
|
}
|
|
245
|
+
// ─── Skill extraction (procedural memory) ─────────────────────────────────
|
|
246
|
+
// After complex tasks, detect reusable procedures and save as skills.
|
|
247
|
+
const SKILL_EXTRACTION_PROMPT = `You are analyzing a conversation where an AI agent completed a complex multi-step task. Decide if this task pattern should be saved as a reusable skill (procedure).
|
|
248
|
+
|
|
249
|
+
Save a skill when:
|
|
250
|
+
1. The task involved 5+ distinct steps that could be repeated
|
|
251
|
+
2. The steps are generalizable (not one-off fixes for specific bugs)
|
|
252
|
+
3. Future similar tasks would benefit from having the procedure documented
|
|
253
|
+
|
|
254
|
+
If the task IS worth saving, output in this exact format (no markdown fences):
|
|
255
|
+
{"skill":{"name":"kebab-case-name","description":"One-line description","triggers":["keyword1","keyword2"],"steps":"## Steps\\n1. First step\\n2. Second step\\n..."}}
|
|
256
|
+
|
|
257
|
+
If NOT worth saving, output exactly:
|
|
258
|
+
{"skill":null}
|
|
259
|
+
|
|
260
|
+
Be selective — only save genuinely reusable multi-step procedures.`;
|
|
261
|
+
const MIN_TOOL_CALLS_FOR_SKILL = 5;
|
|
262
|
+
/**
|
|
263
|
+
* Try to extract a reusable skill from the recent work.
|
|
264
|
+
* Called from maybeMidSessionExtract when enough tool calls happened.
|
|
265
|
+
*/
|
|
266
|
+
export async function maybeExtractSkill(history, turnToolCalls, sessionId, client) {
|
|
267
|
+
if (turnToolCalls < MIN_TOOL_CALLS_FOR_SKILL)
|
|
268
|
+
return;
|
|
269
|
+
// Condense recent history with tool details (skills need tool context)
|
|
270
|
+
const parts = [];
|
|
271
|
+
let chars = 0;
|
|
272
|
+
const CAP = 6000;
|
|
273
|
+
for (const msg of history.slice(-20)) {
|
|
274
|
+
if (chars >= CAP)
|
|
275
|
+
break;
|
|
276
|
+
if (typeof msg.content === 'string') {
|
|
277
|
+
const line = `${msg.role}: ${msg.content.slice(0, 300)}`;
|
|
278
|
+
parts.push(line);
|
|
279
|
+
chars += line.length;
|
|
280
|
+
}
|
|
281
|
+
else if (Array.isArray(msg.content)) {
|
|
282
|
+
for (const p of msg.content) {
|
|
283
|
+
if (chars >= CAP)
|
|
284
|
+
break;
|
|
285
|
+
if (p.type === 'text') {
|
|
286
|
+
const line = `${msg.role}: ${p.text.slice(0, 200)}`;
|
|
287
|
+
parts.push(line);
|
|
288
|
+
chars += line.length;
|
|
289
|
+
}
|
|
290
|
+
else if (p.type === 'tool_use') {
|
|
291
|
+
const line = `tool: ${p.name}(${JSON.stringify(p.input).slice(0, 150)})`;
|
|
292
|
+
parts.push(line);
|
|
293
|
+
chars += line.length;
|
|
294
|
+
}
|
|
295
|
+
else if (p.type === 'tool_result') {
|
|
296
|
+
const text = typeof p.content === 'string' ? p.content : '';
|
|
297
|
+
const line = `result: ${text.slice(0, 100)}`;
|
|
298
|
+
parts.push(line);
|
|
299
|
+
chars += line.length;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
const condensed = parts.join('\n\n');
|
|
305
|
+
if (condensed.length < 200)
|
|
306
|
+
return;
|
|
307
|
+
try {
|
|
308
|
+
let text = '';
|
|
309
|
+
for (const model of EXTRACTION_MODELS) {
|
|
310
|
+
try {
|
|
311
|
+
const response = await client.complete({
|
|
312
|
+
model,
|
|
313
|
+
messages: [{ role: 'user', content: condensed }],
|
|
314
|
+
system: SKILL_EXTRACTION_PROMPT,
|
|
315
|
+
max_tokens: 1500,
|
|
316
|
+
temperature: 0.2,
|
|
317
|
+
});
|
|
318
|
+
text = response.content
|
|
319
|
+
.filter((p) => p.type === 'text')
|
|
320
|
+
.map((p) => p.text)
|
|
321
|
+
.join('');
|
|
322
|
+
break;
|
|
323
|
+
}
|
|
324
|
+
catch {
|
|
325
|
+
continue;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
if (!text)
|
|
329
|
+
return;
|
|
330
|
+
// Parse JSON
|
|
331
|
+
const start = text.indexOf('{');
|
|
332
|
+
const end = text.lastIndexOf('}');
|
|
333
|
+
if (start === -1 || end === -1)
|
|
334
|
+
return;
|
|
335
|
+
const parsed = JSON.parse(text.slice(start, end + 1));
|
|
336
|
+
if (!parsed.skill)
|
|
337
|
+
return;
|
|
338
|
+
const { name, description, triggers, steps } = parsed.skill;
|
|
339
|
+
if (!name || !description || !steps)
|
|
340
|
+
return;
|
|
341
|
+
// Check for duplicate skills
|
|
342
|
+
const existing = loadSkills();
|
|
343
|
+
if (existing.some(s => s.name === name))
|
|
344
|
+
return;
|
|
345
|
+
saveSkill({
|
|
346
|
+
name,
|
|
347
|
+
description,
|
|
348
|
+
triggers: Array.isArray(triggers) ? triggers : [],
|
|
349
|
+
steps,
|
|
350
|
+
created: new Date().toISOString().split('T')[0],
|
|
351
|
+
uses: 0,
|
|
352
|
+
source_session: sessionId,
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
catch {
|
|
356
|
+
// Skill extraction is best-effort
|
|
357
|
+
}
|
|
358
|
+
}
|
|
245
359
|
const midSessionState = {
|
|
246
360
|
lastExtractionTokens: 0,
|
|
247
361
|
lastExtractionToolCalls: 0,
|
|
@@ -289,7 +403,9 @@ export function maybeMidSessionExtract(history, estimatedTokens, totalToolCalls,
|
|
|
289
403
|
const condensed = condenseHistory(history);
|
|
290
404
|
if (condensed.length < 100)
|
|
291
405
|
return;
|
|
292
|
-
// Run in background — errors are silently swallowed
|
|
406
|
+
// Run learnings + skill extraction in background — errors are silently swallowed
|
|
293
407
|
runExtraction(condensed, `${sessionId}:mid-${midSessionState.extractionCount}`, client)
|
|
294
408
|
.catch(() => { });
|
|
409
|
+
maybeExtractSkill(history, totalToolCalls, sessionId, client)
|
|
410
|
+
.catch(() => { });
|
|
295
411
|
}
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export type { Learning, LearningCategory, ExtractionResult } from './types.js';
|
|
2
|
-
export { loadLearnings, saveLearnings, mergeLearning, decayLearnings, formatForPrompt } from './store.js';
|
|
3
|
-
export { extractLearnings, bootstrapFromClaudeConfig, maybeMidSessionExtract } from './extractor.js';
|
|
1
|
+
export type { Learning, LearningCategory, ExtractionResult, Skill } from './types.js';
|
|
2
|
+
export { loadLearnings, saveLearnings, mergeLearning, decayLearnings, formatForPrompt, loadSkills, saveSkill, matchSkills, formatSkillsForPrompt } from './store.js';
|
|
3
|
+
export { extractLearnings, bootstrapFromClaudeConfig, maybeMidSessionExtract, maybeExtractSkill } from './extractor.js';
|
package/dist/learnings/index.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { loadLearnings, saveLearnings, mergeLearning, decayLearnings, formatForPrompt } from './store.js';
|
|
2
|
-
export { extractLearnings, bootstrapFromClaudeConfig, maybeMidSessionExtract } from './extractor.js';
|
|
1
|
+
export { loadLearnings, saveLearnings, mergeLearning, decayLearnings, formatForPrompt, loadSkills, saveSkill, matchSkills, formatSkillsForPrompt } from './store.js';
|
|
2
|
+
export { extractLearnings, bootstrapFromClaudeConfig, maybeMidSessionExtract, maybeExtractSkill } from './extractor.js';
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Persistence layer for per-user learnings.
|
|
3
3
|
* Stored as JSONL at ~/.blockrun/learnings.jsonl.
|
|
4
4
|
*/
|
|
5
|
-
import type { Learning, LearningCategory } from './types.js';
|
|
5
|
+
import type { Learning, LearningCategory, Skill } from './types.js';
|
|
6
6
|
export declare function loadLearnings(): Learning[];
|
|
7
7
|
export declare function saveLearnings(learnings: Learning[]): void;
|
|
8
8
|
export declare function mergeLearning(existing: Learning[], newEntry: {
|
|
@@ -13,3 +13,13 @@ export declare function mergeLearning(existing: Learning[], newEntry: {
|
|
|
13
13
|
}): Learning[];
|
|
14
14
|
export declare function decayLearnings(learnings: Learning[]): Learning[];
|
|
15
15
|
export declare function formatForPrompt(learnings: Learning[]): string;
|
|
16
|
+
/** Load all skills from disk. */
|
|
17
|
+
export declare function loadSkills(): Skill[];
|
|
18
|
+
/** Save a new skill to disk. */
|
|
19
|
+
export declare function saveSkill(skill: Skill): void;
|
|
20
|
+
/** Bump use count for a skill. */
|
|
21
|
+
export declare function bumpSkillUse(skill: Skill): void;
|
|
22
|
+
/** Find skills relevant to a user message, by trigger matching. */
|
|
23
|
+
export declare function matchSkills(input: string, skills: Skill[]): Skill[];
|
|
24
|
+
/** Format matched skills for system prompt injection. */
|
|
25
|
+
export declare function formatSkillsForPrompt(skills: Skill[]): string;
|
package/dist/learnings/store.js
CHANGED
|
@@ -157,3 +157,103 @@ export function formatForPrompt(learnings) {
|
|
|
157
157
|
return '';
|
|
158
158
|
return '# Personal Context\nLearned from previous sessions:\n\n' + sections.join('\n\n');
|
|
159
159
|
}
|
|
160
|
+
// ─── Skills (procedural memory) ──────────────────────────────────────────
|
|
161
|
+
// Stored as individual markdown files in ~/.blockrun/skills/
|
|
162
|
+
// Larger than learnings, conditionally injected based on trigger matching.
|
|
163
|
+
const SKILLS_DIR = path.join(BLOCKRUN_DIR, 'skills');
|
|
164
|
+
const MAX_SKILLS_IN_PROMPT = 5;
|
|
165
|
+
const MAX_SKILL_CHARS = 1500;
|
|
166
|
+
function ensureSkillsDir() {
|
|
167
|
+
if (!fs.existsSync(SKILLS_DIR)) {
|
|
168
|
+
fs.mkdirSync(SKILLS_DIR, { recursive: true });
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
/** Load all skills from disk. */
|
|
172
|
+
export function loadSkills() {
|
|
173
|
+
ensureSkillsDir();
|
|
174
|
+
const skills = [];
|
|
175
|
+
try {
|
|
176
|
+
for (const file of fs.readdirSync(SKILLS_DIR).filter(f => f.endsWith('.md'))) {
|
|
177
|
+
try {
|
|
178
|
+
const raw = fs.readFileSync(path.join(SKILLS_DIR, file), 'utf-8');
|
|
179
|
+
const skill = parseSkillFile(raw);
|
|
180
|
+
if (skill)
|
|
181
|
+
skills.push(skill);
|
|
182
|
+
}
|
|
183
|
+
catch { /* skip corrupt */ }
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
catch { /* dir doesn't exist yet */ }
|
|
187
|
+
return skills;
|
|
188
|
+
}
|
|
189
|
+
function parseSkillFile(raw) {
|
|
190
|
+
const m = raw.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
|
|
191
|
+
if (!m)
|
|
192
|
+
return null;
|
|
193
|
+
const fm = m[1];
|
|
194
|
+
const name = fm.match(/^name:\s*(.+)$/m)?.[1]?.trim() || '';
|
|
195
|
+
const description = fm.match(/^description:\s*(.+)$/m)?.[1]?.trim() || '';
|
|
196
|
+
const triggersRaw = fm.match(/^triggers:\s*\[([^\]]*)\]/m)?.[1] || '';
|
|
197
|
+
const triggers = triggersRaw.split(',').map(t => t.trim()).filter(Boolean);
|
|
198
|
+
const created = fm.match(/^created:\s*(.+)$/m)?.[1]?.trim() || '';
|
|
199
|
+
const uses = parseInt(fm.match(/^uses:\s*(\d+)$/m)?.[1] || '0');
|
|
200
|
+
const source = fm.match(/^source_session:\s*(.+)$/m)?.[1]?.trim() || '';
|
|
201
|
+
if (!name)
|
|
202
|
+
return null;
|
|
203
|
+
return { name, description, triggers, steps: m[2].trim(), created, uses, source_session: source };
|
|
204
|
+
}
|
|
205
|
+
/** Save a new skill to disk. */
|
|
206
|
+
export function saveSkill(skill) {
|
|
207
|
+
ensureSkillsDir();
|
|
208
|
+
const filename = skill.name.replace(/[^a-z0-9-]/gi, '-').toLowerCase() + '.md';
|
|
209
|
+
const fm = [
|
|
210
|
+
'---',
|
|
211
|
+
`name: ${skill.name}`,
|
|
212
|
+
`description: ${skill.description}`,
|
|
213
|
+
`triggers: [${skill.triggers.join(', ')}]`,
|
|
214
|
+
`created: ${skill.created}`,
|
|
215
|
+
`uses: ${skill.uses}`,
|
|
216
|
+
`source_session: ${skill.source_session}`,
|
|
217
|
+
'---',
|
|
218
|
+
].join('\n');
|
|
219
|
+
fs.writeFileSync(path.join(SKILLS_DIR, filename), `${fm}\n${skill.steps}\n`);
|
|
220
|
+
}
|
|
221
|
+
/** Bump use count for a skill. */
|
|
222
|
+
export function bumpSkillUse(skill) {
|
|
223
|
+
const filename = skill.name.replace(/[^a-z0-9-]/gi, '-').toLowerCase() + '.md';
|
|
224
|
+
const fp = path.join(SKILLS_DIR, filename);
|
|
225
|
+
try {
|
|
226
|
+
const raw = fs.readFileSync(fp, 'utf-8');
|
|
227
|
+
fs.writeFileSync(fp, raw.replace(/^uses:\s*\d+$/m, `uses: ${skill.uses + 1}`));
|
|
228
|
+
}
|
|
229
|
+
catch { /* non-critical */ }
|
|
230
|
+
}
|
|
231
|
+
/** Find skills relevant to a user message, by trigger matching. */
|
|
232
|
+
export function matchSkills(input, skills) {
|
|
233
|
+
const lower = input.toLowerCase();
|
|
234
|
+
const scored = [];
|
|
235
|
+
for (const s of skills) {
|
|
236
|
+
let score = 0;
|
|
237
|
+
for (const t of s.triggers) {
|
|
238
|
+
if (lower.includes(t.toLowerCase()))
|
|
239
|
+
score += 2;
|
|
240
|
+
}
|
|
241
|
+
if (lower.includes(s.name.toLowerCase()))
|
|
242
|
+
score += 3;
|
|
243
|
+
score += Math.min(s.uses * 0.5, 3);
|
|
244
|
+
if (score > 0)
|
|
245
|
+
scored.push({ skill: s, score });
|
|
246
|
+
}
|
|
247
|
+
return scored.sort((a, b) => b.score - a.score).slice(0, MAX_SKILLS_IN_PROMPT).map(m => m.skill);
|
|
248
|
+
}
|
|
249
|
+
/** Format matched skills for system prompt injection. */
|
|
250
|
+
export function formatSkillsForPrompt(skills) {
|
|
251
|
+
if (skills.length === 0)
|
|
252
|
+
return '';
|
|
253
|
+
const parts = ['# Learned Skills\nProcedures from previous experience — use when relevant:\n'];
|
|
254
|
+
for (const s of skills) {
|
|
255
|
+
const body = s.steps.length > MAX_SKILL_CHARS ? s.steps.slice(0, MAX_SKILL_CHARS) + '\n…' : s.steps;
|
|
256
|
+
parts.push(`## ${s.name}\n*${s.description}*\n\n${body}`);
|
|
257
|
+
}
|
|
258
|
+
return parts.join('\n\n');
|
|
259
|
+
}
|
|
@@ -21,4 +21,20 @@ export interface ExtractionResult {
|
|
|
21
21
|
category: LearningCategory;
|
|
22
22
|
confidence: number;
|
|
23
23
|
}>;
|
|
24
|
+
/** Procedural skills extracted from complex task patterns. */
|
|
25
|
+
skills?: Array<{
|
|
26
|
+
name: string;
|
|
27
|
+
description: string;
|
|
28
|
+
triggers: string[];
|
|
29
|
+
steps: string;
|
|
30
|
+
}>;
|
|
31
|
+
}
|
|
32
|
+
export interface Skill {
|
|
33
|
+
name: string;
|
|
34
|
+
description: string;
|
|
35
|
+
triggers: string[];
|
|
36
|
+
steps: string;
|
|
37
|
+
created: string;
|
|
38
|
+
uses: number;
|
|
39
|
+
source_session: string;
|
|
24
40
|
}
|
package/dist/tools/index.js
CHANGED
|
@@ -15,6 +15,7 @@ import { askUserCapability } from './askuser.js';
|
|
|
15
15
|
import { tradingSignalCapability, tradingMarketCapability } from './trading.js';
|
|
16
16
|
import { searchXCapability } from './searchx.js';
|
|
17
17
|
import { postToXCapability } from './posttox.js';
|
|
18
|
+
import { moaCapability } from './moa.js';
|
|
18
19
|
/** All capabilities available to the Franklin agent (excluding sub-agent, which needs config). */
|
|
19
20
|
export const allCapabilities = [
|
|
20
21
|
readCapability,
|
|
@@ -32,6 +33,7 @@ export const allCapabilities = [
|
|
|
32
33
|
tradingMarketCapability,
|
|
33
34
|
searchXCapability,
|
|
34
35
|
postToXCapability,
|
|
36
|
+
moaCapability,
|
|
35
37
|
];
|
|
36
38
|
export { readCapability, writeCapability, editCapability, bashCapability, globCapability, grepCapability, webFetchCapability, webSearchCapability, taskCapability, };
|
|
37
39
|
export { createSubAgentCapability } from './subagent.js';
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mixture-of-Agents (MoA) — query multiple models in parallel, aggregate with a strong model.
|
|
3
|
+
*
|
|
4
|
+
* How it works:
|
|
5
|
+
* 1. Send the same prompt to N reference models (cheap/free) in parallel
|
|
6
|
+
* 2. Collect all responses
|
|
7
|
+
* 3. Send all responses + the original prompt to a strong aggregator model
|
|
8
|
+
* 4. Aggregator synthesizes the best answer from all references
|
|
9
|
+
*
|
|
10
|
+
* This produces higher-quality answers than any single model for complex questions.
|
|
11
|
+
* Inspired by the Mixture-of-Agents architecture from Together.ai research.
|
|
12
|
+
*/
|
|
13
|
+
import type { CapabilityHandler } from '../agent/types.js';
|
|
14
|
+
export declare const moaCapability: CapabilityHandler;
|
|
15
|
+
/** Register the API URL for MoA tool (called during agent setup). */
|
|
16
|
+
export declare function registerMoAConfig(apiUrl: string, chain: 'base' | 'solana'): void;
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mixture-of-Agents (MoA) — query multiple models in parallel, aggregate with a strong model.
|
|
3
|
+
*
|
|
4
|
+
* How it works:
|
|
5
|
+
* 1. Send the same prompt to N reference models (cheap/free) in parallel
|
|
6
|
+
* 2. Collect all responses
|
|
7
|
+
* 3. Send all responses + the original prompt to a strong aggregator model
|
|
8
|
+
* 4. Aggregator synthesizes the best answer from all references
|
|
9
|
+
*
|
|
10
|
+
* This produces higher-quality answers than any single model for complex questions.
|
|
11
|
+
* Inspired by the Mixture-of-Agents architecture from Together.ai research.
|
|
12
|
+
*/
|
|
13
|
+
import { ModelClient } from '../agent/llm.js';
|
|
14
|
+
// ─── Configuration ────────────────────────────────────────────────────────
|
|
15
|
+
/** Reference models — diverse, cheap/free models for parallel queries. */
|
|
16
|
+
const REFERENCE_MODELS = [
|
|
17
|
+
'nvidia/nemotron-ultra-253b', // Free, strong reasoning
|
|
18
|
+
'nvidia/qwen3-coder-480b', // Free, strong coding
|
|
19
|
+
'google/gemini-2.5-flash', // Fast, cheap
|
|
20
|
+
'deepseek/deepseek-chat', // Cheap, good reasoning
|
|
21
|
+
];
|
|
22
|
+
/** Aggregator model — strong model that synthesizes the best answer. */
|
|
23
|
+
const AGGREGATOR_MODEL = 'anthropic/claude-sonnet-4.6';
|
|
24
|
+
/** Max tokens per reference response. */
|
|
25
|
+
const REFERENCE_MAX_TOKENS = 4096;
|
|
26
|
+
/** Max tokens for aggregator. */
|
|
27
|
+
const AGGREGATOR_MAX_TOKENS = 8192;
|
|
28
|
+
/** Timeout per reference model call (ms). */
|
|
29
|
+
const REFERENCE_TIMEOUT_MS = 60_000;
|
|
30
|
+
// ─── Implementation ──────────────────────────────────────────────────────
|
|
31
|
+
// These will be injected at registration time
|
|
32
|
+
let registeredApiUrl = '';
|
|
33
|
+
let registeredChain = 'base';
|
|
34
|
+
async function execute(input, ctx) {
|
|
35
|
+
const { prompt, models, aggregator, include_reasoning } = input;
|
|
36
|
+
if (!prompt) {
|
|
37
|
+
return { output: 'Error: prompt is required', isError: true };
|
|
38
|
+
}
|
|
39
|
+
const referenceModels = models || REFERENCE_MODELS;
|
|
40
|
+
const aggregatorModel = aggregator || AGGREGATOR_MODEL;
|
|
41
|
+
const client = new ModelClient({
|
|
42
|
+
apiUrl: registeredApiUrl,
|
|
43
|
+
chain: registeredChain,
|
|
44
|
+
});
|
|
45
|
+
ctx.onProgress?.('Querying reference models...');
|
|
46
|
+
// Step 1: Query all reference models in parallel
|
|
47
|
+
const referencePromises = referenceModels.map(async (model) => {
|
|
48
|
+
const controller = new AbortController();
|
|
49
|
+
const timer = setTimeout(() => controller.abort(), REFERENCE_TIMEOUT_MS);
|
|
50
|
+
try {
|
|
51
|
+
const response = await client.complete({
|
|
52
|
+
model,
|
|
53
|
+
messages: [{ role: 'user', content: prompt }],
|
|
54
|
+
max_tokens: REFERENCE_MAX_TOKENS,
|
|
55
|
+
stream: false,
|
|
56
|
+
}, controller.signal);
|
|
57
|
+
clearTimeout(timer);
|
|
58
|
+
// Extract text from response
|
|
59
|
+
let text = '';
|
|
60
|
+
if (response.content) {
|
|
61
|
+
for (const part of response.content) {
|
|
62
|
+
if (typeof part === 'string')
|
|
63
|
+
text += part;
|
|
64
|
+
else if (part.type === 'text')
|
|
65
|
+
text += part.text;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return { model, text: text.trim(), error: null };
|
|
69
|
+
}
|
|
70
|
+
catch (err) {
|
|
71
|
+
clearTimeout(timer);
|
|
72
|
+
return { model, text: '', error: err.message };
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
const references = await Promise.all(referencePromises);
|
|
76
|
+
// Filter out failures
|
|
77
|
+
const successRefs = references.filter(r => r.text && !r.error);
|
|
78
|
+
if (successRefs.length === 0) {
|
|
79
|
+
const errors = references.map(r => `${r.model}: ${r.error}`).join('\n');
|
|
80
|
+
return { output: `All reference models failed:\n${errors}`, isError: true };
|
|
81
|
+
}
|
|
82
|
+
ctx.onProgress?.(`${successRefs.length}/${referenceModels.length} responded, aggregating...`);
|
|
83
|
+
// Step 2: Build aggregation prompt
|
|
84
|
+
const refSection = successRefs.map((r, i) => `## Response ${i + 1} (${r.model})\n\n${r.text}`).join('\n\n---\n\n');
|
|
85
|
+
const aggregationPrompt = `You have been given ${successRefs.length} responses to the same question from different AI models. Your job is to synthesize the BEST possible answer by:
|
|
86
|
+
|
|
87
|
+
1. Identifying the strongest insights from each response
|
|
88
|
+
2. Resolving any contradictions (prefer verifiable facts)
|
|
89
|
+
3. Combining the best parts into a single, coherent answer
|
|
90
|
+
4. Adding any important points that ALL models missed
|
|
91
|
+
|
|
92
|
+
## Original Question
|
|
93
|
+
|
|
94
|
+
${prompt}
|
|
95
|
+
|
|
96
|
+
## Reference Responses
|
|
97
|
+
|
|
98
|
+
${refSection}
|
|
99
|
+
|
|
100
|
+
## Your Task
|
|
101
|
+
|
|
102
|
+
Synthesize the best possible answer. Be comprehensive but concise. If the responses agree, be confident. If they disagree, note the disagreement and explain which is more likely correct.`;
|
|
103
|
+
// Step 3: Aggregate with strong model
|
|
104
|
+
try {
|
|
105
|
+
const aggResponse = await client.complete({
|
|
106
|
+
model: aggregatorModel,
|
|
107
|
+
messages: [{ role: 'user', content: aggregationPrompt }],
|
|
108
|
+
max_tokens: AGGREGATOR_MAX_TOKENS,
|
|
109
|
+
stream: false,
|
|
110
|
+
}, ctx.abortSignal);
|
|
111
|
+
let aggText = '';
|
|
112
|
+
if (aggResponse.content) {
|
|
113
|
+
for (const part of aggResponse.content) {
|
|
114
|
+
if (typeof part === 'string')
|
|
115
|
+
aggText += part;
|
|
116
|
+
else if (part.type === 'text')
|
|
117
|
+
aggText += part.text;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
// Build output
|
|
121
|
+
const parts = [];
|
|
122
|
+
parts.push(aggText.trim());
|
|
123
|
+
if (include_reasoning) {
|
|
124
|
+
parts.push('\n\n---\n*Reference responses:*');
|
|
125
|
+
for (const ref of successRefs) {
|
|
126
|
+
parts.push(`\n**${ref.model}:** ${ref.text.slice(0, 500)}${ref.text.length > 500 ? '...' : ''}`);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
// Note which models responded
|
|
130
|
+
const modelList = successRefs.map(r => r.model.split('/').pop()).join(', ');
|
|
131
|
+
const failList = references.filter(r => r.error).map(r => r.model.split('/').pop()).join(', ');
|
|
132
|
+
parts.push(`\n\n*MoA: ${successRefs.length} models (${modelList})${failList ? `, ${failList} failed` : ''} → ${aggregatorModel.split('/').pop()}*`);
|
|
133
|
+
return { output: parts.join('\n') };
|
|
134
|
+
}
|
|
135
|
+
catch (err) {
|
|
136
|
+
return {
|
|
137
|
+
output: `Aggregation failed: ${err.message}\n\nBest reference response (${successRefs[0].model}):\n${successRefs[0].text}`,
|
|
138
|
+
isError: true,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
export const moaCapability = {
|
|
143
|
+
spec: {
|
|
144
|
+
name: 'MixtureOfAgents',
|
|
145
|
+
description: `Query multiple AI models in parallel and synthesize the best answer.
|
|
146
|
+
|
|
147
|
+
Use this for complex questions where a single model might miss important perspectives.
|
|
148
|
+
Sends the prompt to 4 diverse models, then aggregates with a strong model.
|
|
149
|
+
|
|
150
|
+
Parameters:
|
|
151
|
+
- prompt (required): The question or task to send to all models
|
|
152
|
+
- models (optional): Array of model IDs to use as references (default: 4 diverse free/cheap models)
|
|
153
|
+
- aggregator (optional): Model to aggregate responses (default: claude-sonnet-4.6)
|
|
154
|
+
- include_reasoning (optional): If true, include reference responses in output`,
|
|
155
|
+
input_schema: {
|
|
156
|
+
type: 'object',
|
|
157
|
+
required: ['prompt'],
|
|
158
|
+
properties: {
|
|
159
|
+
prompt: { type: 'string', description: 'The question or task to send to all models' },
|
|
160
|
+
models: { type: 'array', items: { type: 'string' }, description: 'Override reference models' },
|
|
161
|
+
aggregator: { type: 'string', description: 'Override aggregator model' },
|
|
162
|
+
include_reasoning: { type: 'boolean', description: 'Include reference responses in output' },
|
|
163
|
+
},
|
|
164
|
+
},
|
|
165
|
+
},
|
|
166
|
+
execute,
|
|
167
|
+
concurrent: true,
|
|
168
|
+
};
|
|
169
|
+
/** Register the API URL for MoA tool (called during agent setup). */
|
|
170
|
+
export function registerMoAConfig(apiUrl, chain) {
|
|
171
|
+
registeredApiUrl = apiUrl;
|
|
172
|
+
registeredChain = chain;
|
|
173
|
+
}
|
package/dist/ui/app.js
CHANGED
|
@@ -363,7 +363,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
363
363
|
// Show user message in scrollback so the conversation is readable
|
|
364
364
|
setCommittedResponses(rs => [...rs, {
|
|
365
365
|
key: `user-${Date.now()}`,
|
|
366
|
-
text: chalk.cyan('❯') +
|
|
366
|
+
text: chalk.bold.cyan('❯ ') + chalk.bold(trimmed),
|
|
367
367
|
tokens: { input: 0, output: 0, calls: 0 },
|
|
368
368
|
cost: 0,
|
|
369
369
|
}]);
|
|
@@ -398,7 +398,11 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
398
398
|
// Mouse support — clicks toggle tool results, drag selects text
|
|
399
399
|
useEffect(() => {
|
|
400
400
|
const cleanup = mouse.enable();
|
|
401
|
-
const handleClick = (
|
|
401
|
+
const handleClick = (event) => {
|
|
402
|
+
// Ignore clicks in the input area (bottom 4 rows of the terminal)
|
|
403
|
+
const termRows = process.stdout.rows ?? 24;
|
|
404
|
+
if (event.row >= termRows - 4)
|
|
405
|
+
return;
|
|
402
406
|
// Click: toggle expandable tool
|
|
403
407
|
setExpandableTool(prev => prev ? { ...prev, expanded: !prev.expanded } : null);
|
|
404
408
|
};
|
|
@@ -627,7 +631,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
627
631
|
: _jsx(Text, { color: "green", children: "\u2713" }), ' ', _jsx(Text, { bold: true, children: tool.name }), tool.preview ? _jsxs(Text, { dimColor: true, children: ["(", tool.preview.slice(0, 80), ")"] }) : null, _jsxs(Text, { dimColor: true, children: [" ", elapsedFmt] })] }), tool.diff && !tool.error && tool.diff.oldLines.length <= 8 && tool.diff.newLines.length <= 8 && (_jsxs(Box, { flexDirection: "column", marginLeft: 2, children: [tool.diff.oldLines.map((line, i) => (_jsxs(Text, { color: "red", wrap: "truncate-end", children: ['⎿ ', "- ", line.slice(0, 120)] }, `old-${i}`))), tool.diff.newLines.map((line, i) => (_jsxs(Text, { color: "green", wrap: "truncate-end", children: ['⎿ ', "+ ", line.slice(0, 120)] }, `new-${i}`)))] })), tool.diff && !tool.error && (tool.diff.oldLines.length > 8 || tool.diff.newLines.length > 8) && (_jsx(Box, { marginLeft: 2, children: _jsxs(Text, { dimColor: true, children: ['⎿ ', tool.diff.oldLines.length, " lines \u2192 ", tool.diff.newLines.length, " lines"] }) })), tool.error && tool.fullOutput && (_jsx(Box, { flexDirection: "column", marginLeft: 2, children: tool.fullOutput.split('\n').filter(Boolean).slice(0, 3).map((line, i) => (_jsxs(Text, { color: "red", wrap: "truncate-end", children: ['⎿ ', line.slice(0, 120)] }, i))) }))] }, tool.key));
|
|
628
632
|
} }), _jsx(Static, { items: committedResponses, children: (r) => {
|
|
629
633
|
const isUserMsg = r.key.startsWith('user-');
|
|
630
|
-
return (_jsxs(Box, { flexDirection: "column", children: [!isUserMsg && (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginTop: 1, children: _jsx(Text, { dimColor: true, children: '─'.repeat(60) }) })), _jsx(Text, { wrap: "wrap", children: renderMarkdown(r.text) }), (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginLeft: 1, marginBottom: 1, children: _jsxs(Text, { dimColor: true, children: [r.tier && _jsxs(Text, { color: "cyan", children: ["[", r.tier, "] "] }), r.model ? shortModelName(r.model) : '', r.model ? ' · ' : '', r.tokens.calls > 0 && r.tokens.input === 0
|
|
634
|
+
return (_jsxs(Box, { flexDirection: "column", children: [!isUserMsg && (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginTop: 1, children: _jsx(Text, { dimColor: true, children: '─'.repeat(60) }) })), isUserMsg && (_jsx(Box, { marginTop: 1 })), _jsx(Box, { paddingLeft: isUserMsg ? 0 : 2, children: _jsx(Text, { wrap: "wrap", children: renderMarkdown(r.text) }) }), (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginLeft: 1, marginBottom: 1, children: _jsxs(Text, { dimColor: true, children: [r.tier && _jsxs(Text, { color: "cyan", children: ["[", r.tier, "] "] }), r.model ? shortModelName(r.model) : '', r.model ? ' · ' : '', r.tokens.calls > 0 && r.tokens.input === 0
|
|
631
635
|
? `${r.tokens.calls} calls`
|
|
632
636
|
: `${formatTokens(r.tokens.input)} in / ${formatTokens(r.tokens.output)} out`, r.cost > 0 ? ` · $${r.cost.toFixed(4)}` : '', r.savings !== undefined && r.savings > 0 ? _jsxs(Text, { color: "green", children: [" saved ", Math.round(r.savings * 100), "%"] }) : ''] }) }))] }, r.key));
|
|
633
637
|
} }), permissionRequest && (_jsxs(Box, { flexDirection: "column", marginTop: 1, marginLeft: 1, children: [_jsx(Text, { color: "yellow", children: " \u256D\u2500 Permission required \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Text, { color: "yellow", children: [" \u2502 ", _jsx(Text, { bold: true, children: permissionRequest.toolName })] }), permissionRequest.description.split('\n').map((line, i) => (_jsxs(Text, { dimColor: true, children: [" \u2502 ", line] }, i))), _jsx(Text, { color: "yellow", children: " \u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsx(Box, { marginLeft: 3, children: _jsxs(Text, { children: [_jsx(Text, { bold: true, color: "green", children: "[y]" }), _jsx(Text, { dimColor: true, children: " yes " }), _jsx(Text, { bold: true, color: "cyan", children: "[a]" }), _jsx(Text, { dimColor: true, children: " always " }), _jsx(Text, { bold: true, color: "red", children: "[n]" }), _jsx(Text, { dimColor: true, children: " no" })] }) })] })), askUserRequest && (_jsxs(Box, { flexDirection: "column", marginTop: 1, marginLeft: 1, children: [_jsx(Text, { color: "cyan", children: " \u256D\u2500 Question \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Text, { color: "cyan", children: [" \u2502 ", _jsx(Text, { bold: true, children: askUserRequest.question })] }), askUserRequest.options && askUserRequest.options.length > 0 && (askUserRequest.options.map((opt, i) => (_jsxs(Text, { dimColor: true, children: [" \u2502 ", i + 1, ". ", opt] }, i)))), _jsx(Text, { color: "cyan", children: " \u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Box, { marginLeft: 3, children: [_jsx(Text, { bold: true, children: "answer> " }), _jsx(TextInput, { value: askUserInput, onChange: setAskUserInput, onSubmit: (val) => {
|
package/package.json
CHANGED