@blockrun/franklin 3.15.3 → 3.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -289,13 +289,23 @@ function anySignal(signals) {
289
289
  export function renderGroundingFollowup(result) {
290
290
  if (result.verdict === 'GROUNDED' || result.verdict === 'SKIPPED')
291
291
  return '';
292
+ // Headers state the situation directly. Old phrasing told the user to "re-run
293
+ // with the suggested tools" which both put the burden on them and exposed
294
+ // FRANKLIN_NO_EVAL as a one-flag escape hatch from the quality gate. New
295
+ // phrasing names the gap and offers a concrete next action.
292
296
  const header = result.verdict === 'UNGROUNDED'
293
- ? '⚠️ **Grounding check failed** — the previous answer relied on memory where a tool call was available:'
294
- : '⚠️ **Grounding check flagged some claims** re-run with the suggested tools for a verified answer:';
297
+ ? '⚠️ **Unverified answer** — the model produced specific claims without calling any tool to back them up:'
298
+ : '⚠️ **Partial verification** some claims in the answer aren\'t backed by tool output:';
295
299
  const body = result.issues.length > 0
296
300
  ? result.issues.map(i => `- ${i}`).join('\n')
297
- : '(evaluator returned no specific items — check the transcript manually)';
298
- return `\n\n${header}\n${body}\n\n_Ask again with an explicit instruction to call the tools, or disable these checks with \`FRANKLIN_NO_EVAL=1\`._`;
301
+ : '_(evaluator returned no specific items — check the transcript manually)_';
302
+ // Action line: tell the user exactly how to follow up, in their own voice.
303
+ // No env-var escape hatch in the user-facing text — that's a config concern,
304
+ // not a "make this warning go away" concern.
305
+ const action = result.verdict === 'UNGROUNDED'
306
+ ? '\n\n_Reply "verify" to re-run with required tool use, or accept the answer as-is._'
307
+ : '\n\n_Reply "verify" to fact-check the flagged claims, or accept the answer as-is._';
308
+ return `\n\n${header}\n${body}${action}`;
299
309
  }
300
310
  /**
301
311
  * Build a synthetic user message that instructs the agent to retry with the
@@ -110,8 +110,26 @@ const REASONING_KEYWORDS = [
110
110
  'formally', 'mathematical', 'proof', 'logically', '证明', '定理', '推导',
111
111
  ];
112
112
  const SIMPLE_KEYWORDS = [
113
- 'what is', 'define', 'translate', 'hello', 'yes or no', 'capital of',
114
- 'how old', 'who is', 'when was', '什么是', '翻译', '你好',
113
+ // True simple intents: greeting, definition lookup, translation. Factual
114
+ // lookups ("who is", "when was", "capital of") were moved to RESEARCH below
115
+ // because they look easy but require external recall — sending them to
116
+ // SIMPLE-tier models reliably produces hallucinated subscriber counts,
117
+ // birth years, etc. that the post-hoc grounding check then has to flag.
118
+ 'define', 'translate', 'hello', 'yes or no', '翻译', '你好',
119
+ ];
120
+ // Research / fact-retrieval intent: questions whose correct answer depends
121
+ // on data the model can't reliably recall from weights — current statistics,
122
+ // latest news, comparisons, "best" rankings, identities of people/orgs.
123
+ // Bumping tier here pushes them to a MEDIUM/COMPLEX model that has
124
+ // WebSearch in its toolset, instead of letting a cheap text-only model
125
+ // fabricate plausible-looking numbers.
126
+ const RESEARCH_KEYWORDS = [
127
+ 'who is', 'who was', 'when was', 'when did', 'what is the capital',
128
+ 'how old', 'how many', 'how much',
129
+ 'best', 'top ', 'most popular', 'compare', 'vs ', ' vs.',
130
+ 'latest', 'current', 'recent', 'today', 'now',
131
+ 'subscribers', 'members', 'followers', 'market cap', 'price of',
132
+ '最好的', '最新', '最近', '现在', '当前', '排名', '对比',
115
133
  ];
116
134
  const TECHNICAL_KEYWORDS = [
117
135
  'algorithm', 'optimize', 'architecture', 'distributed', 'kubernetes',
@@ -180,6 +198,17 @@ function classifyRequest(prompt, tokenCount) {
180
198
  score -= 0.25;
181
199
  signals.push('simple');
182
200
  }
201
+ // Research / fact-lookup detection (weight: +0.30). Bumps tier upward so
202
+ // questions like "best subreddit", "current price of X", "how many members"
203
+ // route to a model that can actually call WebSearch instead of guessing
204
+ // from weights. Capped at one keyword's worth — research questions
205
+ // typically signal with one phrase, and stacking would push trivial
206
+ // questions into REASONING.
207
+ const researchMatches = countMatches(prompt, RESEARCH_KEYWORDS);
208
+ if (researchMatches >= 1) {
209
+ score += 0.30;
210
+ signals.push('research');
211
+ }
183
212
  // Technical complexity (weight: 0.15) - increased
184
213
  const techMatches = countMatches(prompt, TECHNICAL_KEYWORDS);
185
214
  if (techMatches >= 2) {
package/dist/ui/app.js CHANGED
@@ -204,6 +204,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
204
204
  const turnModelRef = useRef(undefined);
205
205
  const turnTierRef = useRef(undefined);
206
206
  const turnSavingsRef = useRef(undefined);
207
+ const turnCtxPctRef = useRef(undefined);
207
208
  const queuedInputsRef = useRef([]);
208
209
  // Keep refs in sync so memoized event handlers can read current values
209
210
  streamTextRef.current = streamText;
@@ -241,6 +242,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
241
242
  model: turnModelRef.current,
242
243
  tier: turnTierRef.current,
243
244
  savings: turnSavingsRef.current,
245
+ ctxPct: turnCtxPctRef.current,
244
246
  thinkMs,
245
247
  thinkChars,
246
248
  }];
@@ -423,6 +425,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
423
425
  turnModelRef.current = undefined;
424
426
  turnTierRef.current = undefined;
425
427
  turnSavingsRef.current = undefined;
428
+ turnCtxPctRef.current = undefined;
426
429
  setWaiting(true);
427
430
  setReady(false);
428
431
  // Pass through to agent loop to clear the actual conversation history
@@ -444,6 +447,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
444
447
  turnModelRef.current = undefined;
445
448
  turnTierRef.current = undefined;
446
449
  turnSavingsRef.current = undefined;
450
+ turnCtxPctRef.current = undefined;
447
451
  onSubmit(lastPrompt);
448
452
  return;
449
453
  default:
@@ -494,6 +498,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
494
498
  turnModelRef.current = undefined;
495
499
  turnTierRef.current = undefined;
496
500
  turnSavingsRef.current = undefined;
501
+ turnCtxPctRef.current = undefined;
497
502
  onSubmit(trimmed);
498
503
  }, [ready, currentModel, totalCost, onSubmit, onModelChange, onAbort, onExit, exit, lastPrompt, inputHistory, showStatus]);
499
504
  // Mouse support — OFF by default because Node stdin is shared: mouse escape
@@ -658,8 +663,10 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
658
663
  turnTierRef.current = event.tier;
659
664
  if (event.savings !== undefined)
660
665
  turnSavingsRef.current = event.savings;
661
- if (event.contextPct !== undefined)
666
+ if (event.contextPct !== undefined) {
662
667
  setContextPct(event.contextPct);
668
+ turnCtxPctRef.current = event.contextPct;
669
+ }
663
670
  break;
664
671
  }
665
672
  case 'turn_done': {
@@ -757,9 +764,13 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
757
764
  const isUserMsg = r.key.startsWith('user-');
758
765
  return (_jsxs(Box, { flexDirection: "column", children: [!isUserMsg && (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginTop: 1, children: _jsx(Text, { dimColor: true, children: '─'.repeat(60) }) })), isUserMsg && (_jsx(Box, { marginTop: 1 })), !isUserMsg && r.thinkMs !== undefined && r.thinkMs >= 500 && (_jsx(Box, { paddingLeft: 2, children: _jsxs(Text, { color: "magenta", dimColor: true, children: ["\u273B Thought for ", (r.thinkMs / 1000).toFixed(1), "s", r.thinkChars && r.thinkChars > 20
759
766
  ? ` · ~${Math.round(r.thinkChars / 4)} tokens`
760
- : ''] }) })), _jsx(Box, { paddingLeft: isUserMsg ? 0 : 2, children: _jsx(Text, { wrap: "wrap", children: renderMarkdown(r.text) }) }), (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginLeft: 2, marginBottom: 1, children: _jsxs(Text, { dimColor: true, children: [r.tier && _jsxs(Text, { color: "cyan", children: ["[", r.tier, "] "] }), r.model ? shortModelName(r.model) : '', r.model ? ' · ' : '', r.tokens.calls > 0 && r.tokens.input === 0
767
+ : ''] }) })), _jsx(Box, { paddingLeft: isUserMsg ? 0 : 2, children: _jsx(Text, { wrap: "wrap", children: renderMarkdown(r.text) }) }), (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginLeft: 2, marginBottom: 1, children: _jsxs(Text, { dimColor: true, children: [r.tier
768
+ ? _jsxs(Text, { color: "cyan", children: ["[", r.tier, "] "] })
769
+ : (r.model ? _jsx(Text, { dimColor: true, children: "[direct] " }) : null), r.model ? shortModelName(r.model) : '', r.model ? ' · ' : '', r.tokens.calls > 0 && r.tokens.input === 0
761
770
  ? `${r.tokens.calls} calls`
762
- : `${formatTokens(r.tokens.input)} in / ${formatTokens(r.tokens.output)} out`, r.cost > 0 ? ` · $${r.cost.toFixed(4)}` : '', r.savings !== undefined && r.savings > 0 ? _jsxs(Text, { color: "green", children: [" saved ", Math.round(r.savings * 100), "%"] }) : ''] }) }))] }, r.key));
771
+ : `${formatTokens(r.tokens.input)} in / ${formatTokens(r.tokens.output)} out`, r.cost > 0 ? ` · $${r.cost.toFixed(4)}` : '', r.savings !== undefined && r.savings > 0 ? _jsxs(Text, { color: "green", children: [" saved ", Math.round(r.savings * 100), "%"] }) : '', r.ctxPct !== undefined && r.ctxPct >= 5
772
+ ? _jsxs(Text, { color: r.ctxPct >= 80 ? 'red' : r.ctxPct >= 50 ? 'yellow' : undefined, dimColor: r.ctxPct < 50, children: [" \u00B7 ctx ", r.ctxPct, "%"] })
773
+ : ''] }) }))] }, r.key));
763
774
  } }), permissionRequest && (_jsxs(Box, { flexDirection: "column", marginTop: 1, marginLeft: 2, children: [_jsx(Text, { color: "yellow", children: "\u256D\u2500 Permission required \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Text, { color: "yellow", children: ["\u2502 ", _jsx(Text, { bold: true, children: permissionRequest.toolName })] }), permissionRequest.description.split('\n').map((line, i) => (_jsxs(Text, { dimColor: true, children: ["\u2502 ", line] }, i))), _jsx(Text, { color: "yellow", children: "\u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsx(Box, { marginLeft: 2, children: _jsxs(Text, { children: [_jsx(Text, { bold: true, color: "green", children: "[y]" }), _jsx(Text, { dimColor: true, children: " yes " }), _jsx(Text, { bold: true, color: "cyan", children: "[a]" }), _jsx(Text, { dimColor: true, children: " always " }), _jsx(Text, { bold: true, color: "red", children: "[n]" }), _jsx(Text, { dimColor: true, children: " no" })] }) })] })), askUserRequest && (_jsxs(Box, { flexDirection: "column", marginTop: 1, marginLeft: 2, children: [_jsx(Text, { color: "cyan", children: "\u256D\u2500 Question \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Text, { color: "cyan", children: ["\u2502 ", _jsx(Text, { bold: true, children: askUserRequest.question })] }), askUserRequest.options && askUserRequest.options.length > 0 && (askUserRequest.options.map((opt, i) => (_jsxs(Text, { dimColor: true, children: ["\u2502 ", i + 1, ". ", opt] }, i)))), _jsx(Text, { color: "cyan", children: "\u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Box, { marginLeft: 2, children: [_jsx(Text, { bold: true, children: "answer> " }), _jsx(TextInput, { value: askUserInput, onChange: setAskUserInput, onSubmit: (val) => {
764
775
  const answer = val.trim() || '(no response)';
765
776
  const r = askUserRequest.resolve;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.3",
3
+ "version": "3.15.4",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {