npm - @blockrun/franklin - Versions diffs - 3.6.5 → 3.6.7 - Mend

@blockrun/franklin 3.6.5 → 3.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/dist/agent/commands.js +134 -45
package/dist/agent/context.js +67 -3
package/dist/agent/loop.js +35 -4
package/dist/agent/types.d.ts +2 -0
package/dist/agent/verification.d.ts +42 -0
package/dist/agent/verification.js +206 -0
package/dist/commands/config.js +15 -7
package/dist/commands/setup.js +7 -7
package/dist/commands/start.js +4 -1
package/dist/learnings/extractor.d.ts +5 -0
package/dist/learnings/extractor.js +118 -2
package/dist/learnings/index.d.ts +3 -3
package/dist/learnings/index.js +2 -2
package/dist/learnings/store.d.ts +11 -1
package/dist/learnings/store.js +100 -0
package/dist/learnings/types.d.ts +16 -0
package/dist/tools/index.js +2 -0
package/dist/tools/moa.d.ts +16 -0
package/dist/tools/moa.js +173 -0
package/dist/ui/app.js +7 -3
package/package.json +1 -1

package/dist/agent/commands.js CHANGED Viewed

@@ -202,7 +202,7 @@ const DIRECT_COMMANDS = {
                 `  **Git:** /push /pr /undo /status /diff /log /branch /stash /unstash\n` +
                 `  **Analysis:** /security /lint /optimize /todo /deps /clean /migrate /doc\n` +
                 `  **Session:** /plan /ultraplan /execute /compact /retry /sessions /resume /session-search /context /tasks\n` +
-                `  **Power:** /ultrathink [query] /ultraplan /noplan /dump\n` +
+                `  **Power:** /ultrathink [query] /ultraplan /noplan /moa [query] /dump\n` +
                 `  **Info:** /model /wallet /cost /tokens /learnings /brain /mcp /doctor /version /bug /help\n` +
                 `  **UI:** /clear /exit\n` +
                 (ultrathinkOn ? `\n  Ultrathink: ON\n` : '')
@@ -290,7 +290,7 @@ const DIRECT_COMMANDS = {
         const hasWallet = fs.existsSync(path.join(BLOCKRUN_DIR, 'wallet.json'))
             || fs.existsSync(path.join(BLOCKRUN_DIR, 'solana-wallet.json'));
         checks.push(hasWallet ? '✓ wallet configured' : '⚠ no wallet — run: runcode setup');
-        checks.push(fs.existsSync(path.join(BLOCKRUN_DIR, 'runcode-config.json')) ? '✓ config file exists' : '⚠ no config — using defaults');
+        checks.push(fs.existsSync(path.join(BLOCKRUN_DIR, 'franklin-config.json')) || fs.existsSync(path.join(BLOCKRUN_DIR, 'runcode-config.json')) ? '✓ config file exists' : '⚠ no config — using defaults');
         // Check MCP
         const { listMcpServers } = await import('../mcp/client.js');
         const mcpServers = listMcpServers();
@@ -398,49 +398,6 @@ const DIRECT_COMMANDS = {
         ctx.onEvent({ kind: 'text_delta', text });
         emitDone(ctx);
     },
-    '/wallet': async (ctx) => {
-        const chain = (await import('../config.js')).loadChain();
-        try {
-            let address;
-            let balance;
-            const fetchTimeout = (ms) => new Promise((_, rej) => setTimeout(() => rej(new Error('timeout')), ms));
-            if (chain === 'solana') {
-                const { getOrCreateSolanaWallet, setupAgentSolanaWallet } = await import('@blockrun/llm');
-                const w = await getOrCreateSolanaWallet();
-                address = w.address;
-                try {
-                    const client = await setupAgentSolanaWallet({ silent: true });
-                    const bal = await Promise.race([client.getBalance(), fetchTimeout(5000)]);
-                    balance = `$${bal.toFixed(2)} USDC`;
-                }
-                catch {
-                    balance = '(unavailable)';
-                }
-            }
-            else {
-                const { getOrCreateWallet, setupAgentWallet } = await import('@blockrun/llm');
-                const w = getOrCreateWallet();
-                address = w.address;
-                try {
-                    const client = setupAgentWallet({ silent: true });
-                    const bal = await Promise.race([client.getBalance(), fetchTimeout(5000)]);
-                    balance = `$${bal.toFixed(2)} USDC`;
-                }
-                catch {
-                    balance = '(unavailable)';
-                }
-            }
-            ctx.onEvent({ kind: 'text_delta', text: `**Wallet**\n` +
-                    `  Chain:   ${chain}\n` +
-                    `  Address: ${address}\n` +
-                    `  Balance: ${balance}\n`
-            });
-        }
-        catch (err) {
-            ctx.onEvent({ kind: 'text_delta', text: `Wallet error: ${err.message}\n` });
-        }
-        emitDone(ctx);
-    },
     '/clear': (ctx) => {
         ctx.history.length = 0;
         resetTokenAnchor();
@@ -536,6 +493,8 @@ const ARG_COMMANDS = [
     { prefix: '/refactor ', rewrite: (a) => `Refactor: ${a}. Read the relevant code first, then make targeted changes. Explain each change.` },
     { prefix: '/scaffold ', rewrite: (a) => `Create the scaffolding/boilerplate for: ${a}. Generate the file structure and initial code. Ask me if you need clarification on requirements.` },
     { prefix: '/doc ', rewrite: (a) => `Generate documentation for ${a}. Include: purpose, API/interface description, usage examples, and important notes.` },
+    { prefix: '/moa ', rewrite: (a) => `Use the MixtureOfAgents tool to get a high-quality answer by querying multiple AI models in parallel: ${a}` },
+    { prefix: '/moa', rewrite: () => `Use the MixtureOfAgents tool. Ask me what question I want answered by multiple models.` },
 ];
 // ─── Main dispatch ────────────────────────────────────────────────────────
 /**
@@ -667,6 +626,7 @@ export async function handleSlashCommand(input, ctx) {
         else {
             const newModel = resolveModel(input.slice(7).trim());
             ctx.config.model = newModel;
+            ctx.config.baseModel = newModel; // Update recovery target so loop doesn't reset
             ctx.config.onModelChange?.(newModel);
             ctx.onEvent({ kind: 'text_delta', text: `Model → **${newModel}**\n` });
         }
@@ -690,6 +650,135 @@ export async function handleSlashCommand(input, ctx) {
         emitDone(ctx);
         return { handled: true };
     }
+    // /wallet — show wallet info, import, or export
+    if (input === '/wallet' || input.startsWith('/wallet ')) {
+        const chain = (await import('../config.js')).loadChain();
+        const args = input.slice(7).trim();
+        // /wallet export — show private key
+        if (args === 'export') {
+            try {
+                if (chain === 'solana') {
+                    const { loadSolanaWallet, getOrCreateSolanaWallet } = await import('@blockrun/llm');
+                    const key = loadSolanaWallet();
+                    if (!key) {
+                        ctx.onEvent({ kind: 'text_delta', text: 'No Solana wallet found. Run `/wallet` first.\n' });
+                        emitDone(ctx);
+                        return { handled: true };
+                    }
+                    const w = await getOrCreateSolanaWallet();
+                    ctx.onEvent({ kind: 'text_delta', text: `**Wallet Export (Solana)**\n` +
+                            `  Address:     ${w.address}\n` +
+                            `  Private Key: ${key}\n\n` +
+                            `⚠️  Keep this key safe. Anyone with it controls your funds.\n`
+                    });
+                }
+                else {
+                    const { loadWallet, getOrCreateWallet } = await import('@blockrun/llm');
+                    const key = loadWallet();
+                    if (!key) {
+                        ctx.onEvent({ kind: 'text_delta', text: 'No wallet found. Run `/wallet` first.\n' });
+                        emitDone(ctx);
+                        return { handled: true };
+                    }
+                    const w = getOrCreateWallet();
+                    ctx.onEvent({ kind: 'text_delta', text: `**Wallet Export (Base)**\n` +
+                            `  Address:     ${w.address}\n` +
+                            `  Private Key: ${key}\n\n` +
+                            `⚠️  Keep this key safe. Anyone with it controls your funds.\n`
+                    });
+                }
+            }
+            catch (err) {
+                ctx.onEvent({ kind: 'text_delta', text: `Export error: ${err.message}\n` });
+            }
+            emitDone(ctx);
+            return { handled: true };
+        }
+        // /wallet import <private-key>
+        if (args.startsWith('import')) {
+            const key = args.slice(6).trim();
+            if (!key) {
+                ctx.onEvent({ kind: 'text_delta', text: `**Usage:** \`/wallet import <private-key>\`\n\n` +
+                        `  Base:   \`/wallet import 0x...\`  (hex, 66 chars)\n` +
+                        `  Solana: \`/wallet import <bs58-key>\`  (base58 encoded)\n`
+                });
+                emitDone(ctx);
+                return { handled: true };
+            }
+            try {
+                if (chain === 'solana') {
+                    const { saveSolanaWallet, solanaPublicKey } = await import('@blockrun/llm');
+                    const address = await solanaPublicKey(key);
+                    saveSolanaWallet(key);
+                    ctx.onEvent({ kind: 'text_delta', text: `**Wallet Imported (Solana)**\n` +
+                            `  Address: ${address}\n` +
+                            `  Saved to: ~/.blockrun/\n\n` +
+                            `Restart Franklin to use the new wallet.\n`
+                    });
+                }
+                else {
+                    const { privateKeyToAccount } = await import('viem/accounts');
+                    const { saveWallet } = await import('@blockrun/llm');
+                    const account = privateKeyToAccount(key);
+                    saveWallet(key);
+                    ctx.onEvent({ kind: 'text_delta', text: `**Wallet Imported (Base)**\n` +
+                            `  Address: ${account.address}\n` +
+                            `  Saved to: ~/.blockrun/\n\n` +
+                            `Restart Franklin to use the new wallet.\n`
+                    });
+                }
+            }
+            catch (err) {
+                ctx.onEvent({ kind: 'text_delta', text: `Import error: ${err.message}\n` });
+            }
+            emitDone(ctx);
+            return { handled: true };
+        }
+        // /wallet (no args) — show wallet info
+        try {
+            let address;
+            let balance;
+            const fetchTimeout = (ms) => new Promise((_, rej) => setTimeout(() => rej(new Error('timeout')), ms));
+            if (chain === 'solana') {
+                const { getOrCreateSolanaWallet, setupAgentSolanaWallet } = await import('@blockrun/llm');
+                const w = await getOrCreateSolanaWallet();
+                address = w.address;
+                try {
+                    const client = await setupAgentSolanaWallet({ silent: true });
+                    const bal = await Promise.race([client.getBalance(), fetchTimeout(5000)]);
+                    balance = `$${bal.toFixed(2)} USDC`;
+                }
+                catch {
+                    balance = '(unavailable)';
+                }
+            }
+            else {
+                const { getOrCreateWallet, setupAgentWallet } = await import('@blockrun/llm');
+                const w = getOrCreateWallet();
+                address = w.address;
+                try {
+                    const client = setupAgentWallet({ silent: true });
+                    const bal = await Promise.race([client.getBalance(), fetchTimeout(5000)]);
+                    balance = `$${bal.toFixed(2)} USDC`;
+                }
+                catch {
+                    balance = '(unavailable)';
+                }
+            }
+            ctx.onEvent({ kind: 'text_delta', text: `**Wallet**\n` +
+                    `  Chain:   ${chain}\n` +
+                    `  Address: ${address}\n` +
+                    `  Balance: ${balance}\n\n` +
+                    `  \`/wallet import <key>\`  — import a personal wallet\n` +
+                    `  \`/wallet export\`        — show private key\n`
+            });
+        }
+        catch (err) {
+            ctx.onEvent({ kind: 'text_delta', text: `Wallet error: ${err.message}\n` });
+        }
+        emitDone(ctx);
+        return { handled: true };
+    }
     // /delete <...>
     if (input.startsWith('/delete ')) {
         const arg = input.slice('/delete '.length).trim();

package/dist/agent/context.js CHANGED Viewed

@@ -5,7 +5,7 @@
 import fs from 'node:fs';
 import path from 'node:path';
 import { execSync } from 'node:child_process';
-import { loadLearnings, decayLearnings, saveLearnings, formatForPrompt } from '../learnings/store.js';
+import { loadLearnings, decayLearnings, saveLearnings, formatForPrompt, loadSkills, formatSkillsForPrompt } from '../learnings/store.js';
 // ─── System Instructions Assembly ──────────────────────────────────────────
 // Composable prompt sections — each independently maintainable and conditionally includable.
 function getCoreInstructions() {
@@ -186,10 +186,16 @@ export function assembleInstructions(workingDir, model) {
         getTokenEfficiencySection(),
         getVerificationSection(),
     ];
-    // Read RUNCODE.md or CLAUDE.md from the project
+    // Read RUNCODE.md or CLAUDE.md from the project (with injection scanning)
     const projectConfig = readProjectConfig(workingDir);
     if (projectConfig) {
-        parts.push(`# Project Instructions\n\n${projectConfig}`);
+        const { sanitized, threats } = scanForInjection(projectConfig);
+        if (threats.length > 0) {
+            parts.push(`# Project Instructions\n\n⚠️ WARNING: ${threats.length} suspicious pattern(s) detected in project config and neutralized.\n\n${sanitized}`);
+        }
+        else {
+            parts.push(`# Project Instructions\n\n${projectConfig}`);
+        }
     }
     // Inject environment info
     parts.push(buildEnvironmentSection(workingDir));
@@ -210,6 +216,18 @@ export function assembleInstructions(workingDir, model) {
         }
     }
     catch { /* learnings are optional — never block startup */ }
+    // Inject relevant skills (procedural memory from past complex tasks)
+    try {
+        const allSkills = loadSkills();
+        if (allSkills.length > 0) {
+            // Skills are matched lazily on first user message — for now inject top skills by use count
+            const topSkills = allSkills.sort((a, b) => b.uses - a.uses).slice(0, 5);
+            const skillsSection = formatSkillsForPrompt(topSkills);
+            if (skillsSection)
+                parts.push(skillsSection);
+        }
+    }
+    catch { /* skills are optional */ }
     // Model-specific execution guidance
     if (model) {
         parts.push(getModelGuidance(model));
@@ -276,6 +294,52 @@ export function invalidateInstructionCache(workingDir) {
         _instructionCache.clear();
     }
 }
+// ─── Prompt Injection Detection ────────────────────────────────────────────
+/** Patterns that indicate potential prompt injection in context files. */
+const INJECTION_PATTERNS = [
+    // Direct instruction override attempts
+    { pattern: /ignore\s+(all\s+)?previous\s+instructions/i, description: 'instruction override' },
+    { pattern: /disregard\s+(all\s+)?(previous\s+|above\s+)?rules/i, description: 'rule disregard' },
+    { pattern: /forget\s+(everything|all|your)\s+(you|instructions|rules)/i, description: 'memory wipe' },
+    { pattern: /you\s+are\s+now\s+(?:a\s+)?(?:different|new|unrestricted)/i, description: 'identity hijack' },
+    { pattern: /system\s*:\s*you\s+are/i, description: 'fake system message' },
+    // Dangerous command injection
+    { pattern: /execute\s+(curl|wget|bash|sh|python|node)\b/i, description: 'command execution' },
+    { pattern: /\bcat\s+\/etc\/(passwd|shadow|sudoers)/i, description: 'credential access' },
+    { pattern: /\brm\s+-rf\s+[\/~]/i, description: 'destructive command' },
+    { pattern: /\beval\s*\(/i, description: 'eval injection' },
+    // Data exfiltration
+    { pattern: /\bcurl\s+.*\|\s*(bash|sh)/i, description: 'pipe to shell' },
+    { pattern: /send\s+(to|via)\s+(http|webhook|url)/i, description: 'data exfiltration' },
+    // HTML/comment injection
+    { pattern: /<!--[\s\S]*?-->/g, description: 'HTML comment injection' },
+];
+/** Invisible unicode characters that can hide malicious content. */
+const INVISIBLE_UNICODE = /[\u200B-\u200F\u202A-\u202E\u2060-\u2064\uFEFF\u00AD]/g;
+/**
+ * Scan text for prompt injection patterns and invisible unicode.
+ * Returns sanitized text with threats neutralized and a list of detections.
+ */
+function scanForInjection(text) {
+    const threats = [];
+    let sanitized = text;
+    // Check for invisible unicode
+    if (INVISIBLE_UNICODE.test(sanitized)) {
+        const count = (sanitized.match(INVISIBLE_UNICODE) || []).length;
+        threats.push(`${count} invisible unicode character(s) removed`);
+        sanitized = sanitized.replace(INVISIBLE_UNICODE, '');
+    }
+    // Check for injection patterns
+    for (const { pattern, description } of INJECTION_PATTERNS) {
+        const matches = sanitized.match(pattern);
+        if (matches) {
+            threats.push(`${description}: "${matches[0].slice(0, 50)}"`);
+            // Neutralize by wrapping in brackets (visible but defanged)
+            sanitized = sanitized.replace(pattern, (match) => `[BLOCKED: ${match}]`);
+        }
+    }
+    return { sanitized, threats };
+}
 // ─── Project Config ────────────────────────────────────────────────────────
 /**
  * Look for RUNCODE.md, then CLAUDE.md in the working directory and parents.

package/dist/agent/loop.js CHANGED Viewed

@@ -19,6 +19,7 @@ import { maybeMidSessionExtract } from '../learnings/extractor.js';
 import { routeRequest, parseRoutingProfile } from '../router/index.js';
 import { recordOutcome } from '../router/local-elo.js';
 import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
+import { shouldVerify, runVerification } from './verification.js';
 import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, } from '../session/storage.js';
 /**
  * Atomically replace all elements in a history array.
@@ -218,7 +219,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
     const permissions = new PermissionManager(config.permissionMode ?? 'default', config.permissionPromptFn);
     const history = [];
     let lastUserInput = ''; // For /retry
-    const originalModel = config.model; // Preserve original model/routing profile for recovery
+    config.baseModel = config.model; // User's intended model — /model command updates this
     let turnFailedModels = new Set(); // Models that failed this turn (cleared each new turn)
     // Track models that failed with 402 (payment required) across turns.
     // These persist until the session ends — unlike transient errors, payment failures
@@ -294,9 +295,10 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         // ── Model recovery: try original model at the start of each new turn ──
         // If we fell back to a free model last turn due to a transient error, try original again.
         // But DON'T reset if the original model had a payment failure — it will just fail again.
-        if (config.model !== originalModel && !paymentFailedModels.has(originalModel)) {
-            config.model = originalModel;
-            config.onModelChange?.(originalModel);
+        const baseModel = config.baseModel ?? config.model;
+        if (config.model !== baseModel && !paymentFailedModels.has(baseModel)) {
+            config.model = baseModel;
+            config.onModelChange?.(baseModel);
         }
         turnFailedModels = new Set(); // Fresh slate for transient failures this turn
         const abort = new AbortController();
@@ -714,6 +716,35 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                         });
                     }
                 }
+                // ── Verification gate: run adversarial checks on substantial work ──
+                if (shouldVerify(turnToolCalls, turnToolCounts, lastUserInput || '')) {
+                    try {
+                        const vResult = await runVerification(history, capabilityMap, client, {
+                            model: config.model,
+                            workDir,
+                            abortSignal: abort.signal,
+                            onEvent: (e) => { if (e.kind === 'text_delta' && e.text)
+                                onEvent({ kind: 'text_delta', text: e.text }); },
+                        });
+                        if (vResult.verdict === 'FAIL' && vResult.issues.length > 0) {
+                            // Inject verification feedback — agent will see this and continue fixing
+                            const feedbackMsg = {
+                                role: 'user',
+                                content: `[VERIFICATION FAILED]\n${vResult.summary}\n\nFix the issues above and verify your fixes work.`,
+                            };
+                            history.push(feedbackMsg);
+                            persistSessionMessage(feedbackMsg);
+                            onEvent({ kind: 'text_delta', text: `\n⚠️ *Verification found issues — fixing...*\n` });
+                            continue; // Re-enter the loop to fix issues
+                        }
+                        if (vResult.verdict === 'PASS') {
+                            onEvent({ kind: 'text_delta', text: '\n✓ *Verified*\n' });
+                        }
+                    }
+                    catch {
+                        // Verification errors never block the main flow
+                    }
+                }
                 // Record success for local Elo learning (include tool call count for efficiency)
                 if (lastRoutedCategory && lastRoutedModel) {
                     recordOutcome(lastRoutedCategory, lastRoutedModel, 'continued', turnToolCalls);

package/dist/agent/types.d.ts CHANGED Viewed

@@ -142,4 +142,6 @@ export interface AgentConfig {
     onAskUser?: (question: string, options?: string[]) => Promise<string>;
     /** Notify UI when agent switches model (e.g. payment fallback) */
     onModelChange?: (model: string) => void;
+    /** The user's intended model — updated by /model command, used for turn recovery */
+    baseModel?: string;
 }

package/dist/agent/verification.d.ts ADDED Viewed

@@ -0,0 +1,42 @@
+/**
+ * Verification Agent — adversarial testing gate.
+ *
+ * After the main agent completes substantial work (writes/edits files, runs commands),
+ * this agent runs independently to try to BREAK what was built. It can only read and
+ * execute — never modify files. Returns PASS/FAIL/PARTIAL verdict.
+ *
+ * If FAIL: injects feedback into conversation so the main agent can fix issues.
+ * If PASS: work is considered verified.
+ *
+ * Inspired by Claude Code's verification agent architecture.
+ */
+import type { CapabilityHandler, Dialogue } from './types.js';
+import { ModelClient } from './llm.js';
+export interface VerificationResult {
+    verdict: 'PASS' | 'FAIL' | 'PARTIAL' | 'SKIPPED';
+    summary: string;
+    issues: string[];
+}
+/**
+ * Should we run verification for this turn?
+ * Only for substantial work: 3+ tool calls AND at least one write/edit/bash.
+ */
+export declare function shouldVerify(turnToolCalls: number, turnToolCounts: Map<string, number>, userInput: string): boolean;
+/**
+ * Filter capability handlers to only allow read-only tools.
+ * Bash is allowed (for running tests/builds) but Edit/Write are blocked.
+ */
+export declare function getVerificationTools(handlers: Map<string, CapabilityHandler>): Map<string, CapabilityHandler>;
+/**
+ * Run the verification agent on the current conversation state.
+ * Uses a cheap model to minimize cost. Returns verdict + issues.
+ */
+export declare function runVerification(history: Dialogue[], handlers: Map<string, CapabilityHandler>, client: ModelClient, config: {
+    model: string;
+    workDir: string;
+    abortSignal: AbortSignal;
+    onEvent?: (event: {
+        kind: string;
+        text?: string;
+    }) => void;
+}): Promise<VerificationResult>;

package/dist/agent/verification.js ADDED Viewed

@@ -0,0 +1,206 @@
+/**
+ * Verification Agent — adversarial testing gate.
+ *
+ * After the main agent completes substantial work (writes/edits files, runs commands),
+ * this agent runs independently to try to BREAK what was built. It can only read and
+ * execute — never modify files. Returns PASS/FAIL/PARTIAL verdict.
+ *
+ * If FAIL: injects feedback into conversation so the main agent can fix issues.
+ * If PASS: work is considered verified.
+ *
+ * Inspired by Claude Code's verification agent architecture.
+ */
+// ─── Verification System Prompt ───────────────────────────────────────────
+const VERIFICATION_PROMPT = `You are a VERIFICATION agent. Your job is NOT to confirm that code works — it is to TRY TO BREAK IT.
+## Rules
+1. **Adversarial mindset**: Assume the code has bugs. Your goal is to find them.
+2. **No modifications**: You may ONLY use Read, Bash, Glob, and Grep tools. You MUST NOT use Edit, Write, or any tool that modifies files.
+3. **Evidence required**: Every check MUST include:
+   - What you tested (the exact command or operation)
+   - The actual output
+   - Whether it PASSED or FAILED
+4. **No rationalization**: These phrases are NEVER acceptable as evidence:
+   - "The code looks correct"
+   - "This should work"
+   - "Based on the implementation, it handles..."
+   - "The tests pass" (unless you actually ran them and showed output)
+## What to Check
+1. **Does it compile/build?** Run the build command.
+2. **Do tests pass?** Run the test suite.
+3. **Edge cases**: Empty inputs, very large inputs, missing files, invalid data.
+4. **Error handling**: What happens when things go wrong?
+5. **Consistency**: Does the change break other parts of the codebase?
+## Output Format
+After running your checks, output a verdict in EXACTLY this format:
+VERDICT: PASS|FAIL|PARTIAL
+Then explain:
+- What you tested
+- What passed
+- What failed (if any)
+- Specific issues to fix (if FAIL)
+Keep it concise — focus on actionable findings, not narration.`;
+// ─── Thresholds ──────────────────────────────────────────────────────────
+/** Only verify turns where substantial work was done. */
+const WRITE_TOOLS = new Set(['Edit', 'Write', 'Bash']);
+/** Minimum tool calls to trigger verification. */
+const MIN_TOOL_CALLS = 3;
+/** Maximum tokens to spend on verification (prevent runaway). */
+const MAX_VERIFICATION_TOKENS = 8192;
+// ─── Decision Logic ──────────────────────────────────────────────────────
+/**
+ * Should we run verification for this turn?
+ * Only for substantial work: 3+ tool calls AND at least one write/edit/bash.
+ */
+export function shouldVerify(turnToolCalls, turnToolCounts, userInput) {
+    // Skip if not enough tool calls
+    if (turnToolCalls < MIN_TOOL_CALLS)
+        return false;
+    // Skip if no write-like tools were used
+    let hasWriteTool = false;
+    for (const [name] of turnToolCounts) {
+        if (WRITE_TOOLS.has(name)) {
+            hasWriteTool = true;
+            break;
+        }
+    }
+    if (!hasWriteTool)
+        return false;
+    // Skip if user explicitly asked for something quick
+    const lower = userInput.toLowerCase();
+    if (lower.startsWith('/') || lower.length < 20)
+        return false;
+    return true;
+}
+// ─── Read-only tool filter ───────────────────────────────────────────────
+const READ_ONLY_TOOLS = new Set(['Read', 'Glob', 'Grep', 'Bash', 'WebSearch', 'WebFetch']);
+/**
+ * Filter capability handlers to only allow read-only tools.
+ * Bash is allowed (for running tests/builds) but Edit/Write are blocked.
+ */
+export function getVerificationTools(handlers) {
+    const filtered = new Map();
+    for (const [name, handler] of handlers) {
+        if (READ_ONLY_TOOLS.has(name)) {
+            filtered.set(name, handler);
+        }
+    }
+    return filtered;
+}
+// ─── Run Verification ────────────────────────────────────────────────────
+/**
+ * Run the verification agent on the current conversation state.
+ * Uses a cheap model to minimize cost. Returns verdict + issues.
+ */
+export async function runVerification(history, handlers, client, config) {
+    const verificationTools = getVerificationTools(handlers);
+    // Build verification prompt from recent history context
+    const recentWork = extractRecentWork(history);
+    if (!recentWork) {
+        return { verdict: 'SKIPPED', summary: 'No recent work to verify.', issues: [] };
+    }
+    const verificationHistory = [
+        {
+            role: 'user',
+            content: `The following work was just completed. Your job is to VERIFY it by running adversarial checks.\n\n${recentWork}\n\nRun build, tests, and edge case checks. Output your VERDICT.`,
+        },
+    ];
+    config.onEvent?.({ kind: 'text_delta', text: '\n*Verifying...*\n' });
+    // Use cheap model for verification
+    const verificationModel = 'nvidia/nemotron-ultra-253b'; // Free model to keep cost zero
+    try {
+        // Simple single-turn verification call
+        const response = await client.complete({
+            model: verificationModel,
+            system: VERIFICATION_PROMPT,
+            messages: verificationHistory,
+            tools: Array.from(verificationTools.values()).map(h => h.spec),
+            max_tokens: MAX_VERIFICATION_TOKENS,
+        });
+        // Extract text from response
+        let responseText = '';
+        if (response.content) {
+            for (const part of response.content) {
+                if (typeof part === 'string') {
+                    responseText += part;
+                }
+                else if (part.type === 'text') {
+                    responseText += part.text;
+                }
+            }
+        }
+        // Parse verdict
+        const verdictMatch = responseText.match(/VERDICT:\s*(PASS|FAIL|PARTIAL)/i);
+        const verdict = verdictMatch
+            ? verdictMatch[1].toUpperCase()
+            : 'PARTIAL';
+        // Extract issues
+        const issues = [];
+        const issueLines = responseText.split('\n').filter(l => l.match(/^[-•*]\s*(FAIL|ERROR|BUG|ISSUE|PROBLEM)/i) ||
+            l.match(/^[-•*]\s+.*fail/i));
+        for (const line of issueLines) {
+            issues.push(line.replace(/^[-•*]\s*/, '').trim());
+        }
+        return { verdict, summary: responseText.slice(0, 500), issues };
+    }
+    catch (err) {
+        // Verification failure should never block the main flow
+        return {
+            verdict: 'SKIPPED',
+            summary: `Verification error: ${err.message}`,
+            issues: [],
+        };
+    }
+}
+/**
+ * Extract a summary of recent work from the conversation history.
+ * Looks at the last assistant turn and its tool calls.
+ */
+function extractRecentWork(history) {
+    const parts = [];
+    // Walk backwards through history to find recent tool uses and assistant messages
+    let found = 0;
+    for (let i = history.length - 1; i >= 0 && found < 10; i--) {
+        const msg = history[i];
+        const role = msg.role;
+        // Stop at a pure user message boundary (not a tool_result user message)
+        if (role === 'user' && !Array.isArray(msg.content))
+            break;
+        if (role === 'assistant' && Array.isArray(msg.content)) {
+            for (const part of msg.content) {
+                if (typeof part === 'object') {
+                    if (part.type === 'text' && part.text) {
+                        parts.unshift(`Assistant: ${part.text.slice(0, 500)}`);
+                        found++;
+                    }
+                    else if (part.type === 'tool_use') {
+                        parts.unshift(`Tool: ${part.name}(${JSON.stringify(part.input).slice(0, 200)})`);
+                        found++;
+                    }
+                }
+            }
+        }
+        else if (role === 'user' && Array.isArray(msg.content)) {
+            for (const part of msg.content) {
+                if (typeof part === 'object' && part.type === 'tool_result') {
+                    const output = typeof part.content === 'string'
+                        ? part.content
+                        : Array.isArray(part.content)
+                            ? part.content.map(c => c.text || '').join('\n')
+                            : '';
+                    parts.unshift(`Result: ${output.slice(0, 300)}`);
+                    found++;
+                }
+            }
+        }
+    }
+    return parts.length > 0 ? parts.join('\n\n') : null;
+}