npm - @blockrun/franklin - Versions diffs - 3.15.3 → 3.15.4 - Mend

@blockrun/franklin 3.15.3 → 3.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/agent/evaluator.js CHANGED Viewed

@@ -289,13 +289,23 @@ function anySignal(signals) {
 export function renderGroundingFollowup(result) {
     if (result.verdict === 'GROUNDED' || result.verdict === 'SKIPPED')
         return '';
+    // Headers state the situation directly. Old phrasing told the user to "re-run
+    // with the suggested tools" which both put the burden on them and exposed
+    // FRANKLIN_NO_EVAL as a one-flag escape hatch from the quality gate. New
+    // phrasing names the gap and offers a concrete next action.
     const header = result.verdict === 'UNGROUNDED'
-        ? '⚠️ **Grounding check failed** — the previous answer relied on memory where a tool call was available:'
-        : '⚠️ **Grounding check flagged some claims** — re-run with the suggested tools for a verified answer:';
+        ? '⚠️ **Unverified answer** — the model produced specific claims without calling any tool to back them up:'
+        : '⚠️ **Partial verification** — some claims in the answer aren\'t backed by tool output:';
     const body = result.issues.length > 0
         ? result.issues.map(i => `- ${i}`).join('\n')
-        : '(evaluator returned no specific items — check the transcript manually)';
-    return `\n\n${header}\n${body}\n\n_Ask again with an explicit instruction to call the tools, or disable these checks with \`FRANKLIN_NO_EVAL=1\`._`;
+        : '_(evaluator returned no specific items — check the transcript manually)_';
+    // Action line: tell the user exactly how to follow up, in their own voice.
+    // No env-var escape hatch in the user-facing text — that's a config concern,
+    // not a "make this warning go away" concern.
+    const action = result.verdict === 'UNGROUNDED'
+        ? '\n\n_Reply "verify" to re-run with required tool use, or accept the answer as-is._'
+        : '\n\n_Reply "verify" to fact-check the flagged claims, or accept the answer as-is._';
+    return `\n\n${header}\n${body}${action}`;
 }
 /**
  * Build a synthetic user message that instructs the agent to retry with the

package/dist/router/index.js CHANGED Viewed

@@ -110,8 +110,26 @@ const REASONING_KEYWORDS = [
     'formally', 'mathematical', 'proof', 'logically', '证明', '定理', '推导',
 ];
 const SIMPLE_KEYWORDS = [
-    'what is', 'define', 'translate', 'hello', 'yes or no', 'capital of',
-    'how old', 'who is', 'when was', '什么是', '翻译', '你好',
+    // True simple intents: greeting, definition lookup, translation. Factual
+    // lookups ("who is", "when was", "capital of") were moved to RESEARCH below
+    // because they look easy but require external recall — sending them to
+    // SIMPLE-tier models reliably produces hallucinated subscriber counts,
+    // birth years, etc. that the post-hoc grounding check then has to flag.
+    'define', 'translate', 'hello', 'yes or no', '翻译', '你好',
+];
+// Research / fact-retrieval intent: questions whose correct answer depends
+// on data the model can't reliably recall from weights — current statistics,
+// latest news, comparisons, "best" rankings, identities of people/orgs.
+// Bumping tier here pushes them to a MEDIUM/COMPLEX model that has
+// WebSearch in its toolset, instead of letting a cheap text-only model
+// fabricate plausible-looking numbers.
+const RESEARCH_KEYWORDS = [
+    'who is', 'who was', 'when was', 'when did', 'what is the capital',
+    'how old', 'how many', 'how much',
+    'best', 'top ', 'most popular', 'compare', 'vs ', ' vs.',
+    'latest', 'current', 'recent', 'today', 'now',
+    'subscribers', 'members', 'followers', 'market cap', 'price of',
+    '最好的', '最新', '最近', '现在', '当前', '排名', '对比',
 ];
 const TECHNICAL_KEYWORDS = [
     'algorithm', 'optimize', 'architecture', 'distributed', 'kubernetes',
@@ -180,6 +198,17 @@ function classifyRequest(prompt, tokenCount) {
         score -= 0.25;
         signals.push('simple');
     }
+    // Research / fact-lookup detection (weight: +0.30). Bumps tier upward so
+    // questions like "best subreddit", "current price of X", "how many members"
+    // route to a model that can actually call WebSearch instead of guessing
+    // from weights. Capped at one keyword's worth — research questions
+    // typically signal with one phrase, and stacking would push trivial
+    // questions into REASONING.
+    const researchMatches = countMatches(prompt, RESEARCH_KEYWORDS);
+    if (researchMatches >= 1) {
+        score += 0.30;
+        signals.push('research');
+    }
     // Technical complexity (weight: 0.15) - increased
     const techMatches = countMatches(prompt, TECHNICAL_KEYWORDS);
     if (techMatches >= 2) {

package/dist/ui/app.js CHANGED Viewed

@@ -204,6 +204,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
     const turnModelRef = useRef(undefined);
     const turnTierRef = useRef(undefined);
     const turnSavingsRef = useRef(undefined);
+    const turnCtxPctRef = useRef(undefined);
     const queuedInputsRef = useRef([]);
     // Keep refs in sync so memoized event handlers can read current values
     streamTextRef.current = streamText;
@@ -241,6 +242,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
                     model: turnModelRef.current,
                     tier: turnTierRef.current,
                     savings: turnSavingsRef.current,
+                    ctxPct: turnCtxPctRef.current,
                     thinkMs,
                     thinkChars,
                 }];
@@ -423,6 +425,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
                     turnModelRef.current = undefined;
                     turnTierRef.current = undefined;
                     turnSavingsRef.current = undefined;
+                    turnCtxPctRef.current = undefined;
                     setWaiting(true);
                     setReady(false);
                     // Pass through to agent loop to clear the actual conversation history
@@ -444,6 +447,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
                     turnModelRef.current = undefined;
                     turnTierRef.current = undefined;
                     turnSavingsRef.current = undefined;
+                    turnCtxPctRef.current = undefined;
                     onSubmit(lastPrompt);
                     return;
                 default:
@@ -494,6 +498,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
         turnModelRef.current = undefined;
         turnTierRef.current = undefined;
         turnSavingsRef.current = undefined;
+        turnCtxPctRef.current = undefined;
         onSubmit(trimmed);
     }, [ready, currentModel, totalCost, onSubmit, onModelChange, onAbort, onExit, exit, lastPrompt, inputHistory, showStatus]);
     // Mouse support — OFF by default because Node stdin is shared: mouse escape
@@ -658,8 +663,10 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
                             turnTierRef.current = event.tier;
                         if (event.savings !== undefined)
                             turnSavingsRef.current = event.savings;
-                        if (event.contextPct !== undefined)
+                        if (event.contextPct !== undefined) {
                             setContextPct(event.contextPct);
+                            turnCtxPctRef.current = event.contextPct;
+                        }
                         break;
                     }
                     case 'turn_done': {
@@ -757,9 +764,13 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
                     const isUserMsg = r.key.startsWith('user-');
                     return (_jsxs(Box, { flexDirection: "column", children: [!isUserMsg && (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginTop: 1, children: _jsx(Text, { dimColor: true, children: '─'.repeat(60) }) })), isUserMsg && (_jsx(Box, { marginTop: 1 })), !isUserMsg && r.thinkMs !== undefined && r.thinkMs >= 500 && (_jsx(Box, { paddingLeft: 2, children: _jsxs(Text, { color: "magenta", dimColor: true, children: ["\u273B Thought for ", (r.thinkMs / 1000).toFixed(1), "s", r.thinkChars && r.thinkChars > 20
                                             ? ` · ~${Math.round(r.thinkChars / 4)} tokens`
-                                            : ''] }) })), _jsx(Box, { paddingLeft: isUserMsg ? 0 : 2, children: _jsx(Text, { wrap: "wrap", children: renderMarkdown(r.text) }) }), (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginLeft: 2, marginBottom: 1, children: _jsxs(Text, { dimColor: true, children: [r.tier && _jsxs(Text, { color: "cyan", children: ["[", r.tier, "] "] }), r.model ? shortModelName(r.model) : '', r.model ? '  ·  ' : '', r.tokens.calls > 0 && r.tokens.input === 0
+                                            : ''] }) })), _jsx(Box, { paddingLeft: isUserMsg ? 0 : 2, children: _jsx(Text, { wrap: "wrap", children: renderMarkdown(r.text) }) }), (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginLeft: 2, marginBottom: 1, children: _jsxs(Text, { dimColor: true, children: [r.tier
+                                            ? _jsxs(Text, { color: "cyan", children: ["[", r.tier, "] "] })
+                                            : (r.model ? _jsx(Text, { dimColor: true, children: "[direct] " }) : null), r.model ? shortModelName(r.model) : '', r.model ? '  ·  ' : '', r.tokens.calls > 0 && r.tokens.input === 0
                                             ? `${r.tokens.calls} calls`
-                                            : `${formatTokens(r.tokens.input)} in / ${formatTokens(r.tokens.output)} out`, r.cost > 0 ? `  ·  $${r.cost.toFixed(4)}` : '', r.savings !== undefined && r.savings > 0 ? _jsxs(Text, { color: "green", children: ["  saved ", Math.round(r.savings * 100), "%"] }) : ''] }) }))] }, r.key));
+                                            : `${formatTokens(r.tokens.input)} in / ${formatTokens(r.tokens.output)} out`, r.cost > 0 ? `  ·  $${r.cost.toFixed(4)}` : '', r.savings !== undefined && r.savings > 0 ? _jsxs(Text, { color: "green", children: ["  saved ", Math.round(r.savings * 100), "%"] }) : '', r.ctxPct !== undefined && r.ctxPct >= 5
+                                            ? _jsxs(Text, { color: r.ctxPct >= 80 ? 'red' : r.ctxPct >= 50 ? 'yellow' : undefined, dimColor: r.ctxPct < 50, children: ["  \u00B7  ctx ", r.ctxPct, "%"] })
+                                            : ''] }) }))] }, r.key));
                 } }), permissionRequest && (_jsxs(Box, { flexDirection: "column", marginTop: 1, marginLeft: 2, children: [_jsx(Text, { color: "yellow", children: "\u256D\u2500 Permission required \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Text, { color: "yellow", children: ["\u2502 ", _jsx(Text, { bold: true, children: permissionRequest.toolName })] }), permissionRequest.description.split('\n').map((line, i) => (_jsxs(Text, { dimColor: true, children: ["\u2502 ", line] }, i))), _jsx(Text, { color: "yellow", children: "\u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsx(Box, { marginLeft: 2, children: _jsxs(Text, { children: [_jsx(Text, { bold: true, color: "green", children: "[y]" }), _jsx(Text, { dimColor: true, children: " yes  " }), _jsx(Text, { bold: true, color: "cyan", children: "[a]" }), _jsx(Text, { dimColor: true, children: " always  " }), _jsx(Text, { bold: true, color: "red", children: "[n]" }), _jsx(Text, { dimColor: true, children: " no" })] }) })] })), askUserRequest && (_jsxs(Box, { flexDirection: "column", marginTop: 1, marginLeft: 2, children: [_jsx(Text, { color: "cyan", children: "\u256D\u2500 Question \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Text, { color: "cyan", children: ["\u2502 ", _jsx(Text, { bold: true, children: askUserRequest.question })] }), askUserRequest.options && askUserRequest.options.length > 0 && (askUserRequest.options.map((opt, i) => (_jsxs(Text, { dimColor: true, children: ["\u2502 ", i + 1, ". ", opt] }, i)))), _jsx(Text, { color: "cyan", children: "\u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Box, { marginLeft: 2, children: [_jsx(Text, { bold: true, children: "answer> " }), _jsx(TextInput, { value: askUserInput, onChange: setAskUserInput, onSubmit: (val) => {
                                     const answer = val.trim() || '(no response)';
                                     const r = askUserRequest.resolve;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.15.3",
+  "version": "3.15.4",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {