@blockrun/franklin 3.15.3 → 3.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/evaluator.js +14 -4
- package/dist/router/index.js +31 -2
- package/dist/ui/app.js +14 -3
- package/package.json +1 -1
package/dist/agent/evaluator.js
CHANGED
|
@@ -289,13 +289,23 @@ function anySignal(signals) {
|
|
|
289
289
|
export function renderGroundingFollowup(result) {
|
|
290
290
|
if (result.verdict === 'GROUNDED' || result.verdict === 'SKIPPED')
|
|
291
291
|
return '';
|
|
292
|
+
// Headers state the situation directly. Old phrasing told the user to "re-run
|
|
293
|
+
// with the suggested tools" which both put the burden on them and exposed
|
|
294
|
+
// FRANKLIN_NO_EVAL as a one-flag escape hatch from the quality gate. New
|
|
295
|
+
// phrasing names the gap and offers a concrete next action.
|
|
292
296
|
const header = result.verdict === 'UNGROUNDED'
|
|
293
|
-
? '⚠️ **
|
|
294
|
-
: '⚠️ **
|
|
297
|
+
? '⚠️ **Unverified answer** — the model produced specific claims without calling any tool to back them up:'
|
|
298
|
+
: '⚠️ **Partial verification** — some claims in the answer aren\'t backed by tool output:';
|
|
295
299
|
const body = result.issues.length > 0
|
|
296
300
|
? result.issues.map(i => `- ${i}`).join('\n')
|
|
297
|
-
: '(evaluator returned no specific items — check the transcript manually)';
|
|
298
|
-
|
|
301
|
+
: '_(evaluator returned no specific items — check the transcript manually)_';
|
|
302
|
+
// Action line: tell the user exactly how to follow up, in their own voice.
|
|
303
|
+
// No env-var escape hatch in the user-facing text — that's a config concern,
|
|
304
|
+
// not a "make this warning go away" concern.
|
|
305
|
+
const action = result.verdict === 'UNGROUNDED'
|
|
306
|
+
? '\n\n_Reply "verify" to re-run with required tool use, or accept the answer as-is._'
|
|
307
|
+
: '\n\n_Reply "verify" to fact-check the flagged claims, or accept the answer as-is._';
|
|
308
|
+
return `\n\n${header}\n${body}${action}`;
|
|
299
309
|
}
|
|
300
310
|
/**
|
|
301
311
|
* Build a synthetic user message that instructs the agent to retry with the
|
package/dist/router/index.js
CHANGED
|
@@ -110,8 +110,26 @@ const REASONING_KEYWORDS = [
|
|
|
110
110
|
'formally', 'mathematical', 'proof', 'logically', '证明', '定理', '推导',
|
|
111
111
|
];
|
|
112
112
|
const SIMPLE_KEYWORDS = [
|
|
113
|
-
|
|
114
|
-
|
|
113
|
+
// True simple intents: greeting, definition lookup, translation. Factual
|
|
114
|
+
// lookups ("who is", "when was", "capital of") were moved to RESEARCH below
|
|
115
|
+
// because they look easy but require external recall — sending them to
|
|
116
|
+
// SIMPLE-tier models reliably produces hallucinated subscriber counts,
|
|
117
|
+
// birth years, etc. that the post-hoc grounding check then has to flag.
|
|
118
|
+
'define', 'translate', 'hello', 'yes or no', '翻译', '你好',
|
|
119
|
+
];
|
|
120
|
+
// Research / fact-retrieval intent: questions whose correct answer depends
|
|
121
|
+
// on data the model can't reliably recall from weights — current statistics,
|
|
122
|
+
// latest news, comparisons, "best" rankings, identities of people/orgs.
|
|
123
|
+
// Bumping tier here pushes them to a MEDIUM/COMPLEX model that has
|
|
124
|
+
// WebSearch in its toolset, instead of letting a cheap text-only model
|
|
125
|
+
// fabricate plausible-looking numbers.
|
|
126
|
+
const RESEARCH_KEYWORDS = [
|
|
127
|
+
'who is', 'who was', 'when was', 'when did', 'what is the capital',
|
|
128
|
+
'how old', 'how many', 'how much',
|
|
129
|
+
'best', 'top ', 'most popular', 'compare', 'vs ', ' vs.',
|
|
130
|
+
'latest', 'current', 'recent', 'today', 'now',
|
|
131
|
+
'subscribers', 'members', 'followers', 'market cap', 'price of',
|
|
132
|
+
'最好的', '最新', '最近', '现在', '当前', '排名', '对比',
|
|
115
133
|
];
|
|
116
134
|
const TECHNICAL_KEYWORDS = [
|
|
117
135
|
'algorithm', 'optimize', 'architecture', 'distributed', 'kubernetes',
|
|
@@ -180,6 +198,17 @@ function classifyRequest(prompt, tokenCount) {
|
|
|
180
198
|
score -= 0.25;
|
|
181
199
|
signals.push('simple');
|
|
182
200
|
}
|
|
201
|
+
// Research / fact-lookup detection (weight: +0.30). Bumps tier upward so
|
|
202
|
+
// questions like "best subreddit", "current price of X", "how many members"
|
|
203
|
+
// route to a model that can actually call WebSearch instead of guessing
|
|
204
|
+
// from weights. Capped at one keyword's worth — research questions
|
|
205
|
+
// typically signal with one phrase, and stacking would push trivial
|
|
206
|
+
// questions into REASONING.
|
|
207
|
+
const researchMatches = countMatches(prompt, RESEARCH_KEYWORDS);
|
|
208
|
+
if (researchMatches >= 1) {
|
|
209
|
+
score += 0.30;
|
|
210
|
+
signals.push('research');
|
|
211
|
+
}
|
|
183
212
|
// Technical complexity (weight: 0.15) - increased
|
|
184
213
|
const techMatches = countMatches(prompt, TECHNICAL_KEYWORDS);
|
|
185
214
|
if (techMatches >= 2) {
|
package/dist/ui/app.js
CHANGED
|
@@ -204,6 +204,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
204
204
|
const turnModelRef = useRef(undefined);
|
|
205
205
|
const turnTierRef = useRef(undefined);
|
|
206
206
|
const turnSavingsRef = useRef(undefined);
|
|
207
|
+
const turnCtxPctRef = useRef(undefined);
|
|
207
208
|
const queuedInputsRef = useRef([]);
|
|
208
209
|
// Keep refs in sync so memoized event handlers can read current values
|
|
209
210
|
streamTextRef.current = streamText;
|
|
@@ -241,6 +242,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
241
242
|
model: turnModelRef.current,
|
|
242
243
|
tier: turnTierRef.current,
|
|
243
244
|
savings: turnSavingsRef.current,
|
|
245
|
+
ctxPct: turnCtxPctRef.current,
|
|
244
246
|
thinkMs,
|
|
245
247
|
thinkChars,
|
|
246
248
|
}];
|
|
@@ -423,6 +425,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
423
425
|
turnModelRef.current = undefined;
|
|
424
426
|
turnTierRef.current = undefined;
|
|
425
427
|
turnSavingsRef.current = undefined;
|
|
428
|
+
turnCtxPctRef.current = undefined;
|
|
426
429
|
setWaiting(true);
|
|
427
430
|
setReady(false);
|
|
428
431
|
// Pass through to agent loop to clear the actual conversation history
|
|
@@ -444,6 +447,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
444
447
|
turnModelRef.current = undefined;
|
|
445
448
|
turnTierRef.current = undefined;
|
|
446
449
|
turnSavingsRef.current = undefined;
|
|
450
|
+
turnCtxPctRef.current = undefined;
|
|
447
451
|
onSubmit(lastPrompt);
|
|
448
452
|
return;
|
|
449
453
|
default:
|
|
@@ -494,6 +498,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
494
498
|
turnModelRef.current = undefined;
|
|
495
499
|
turnTierRef.current = undefined;
|
|
496
500
|
turnSavingsRef.current = undefined;
|
|
501
|
+
turnCtxPctRef.current = undefined;
|
|
497
502
|
onSubmit(trimmed);
|
|
498
503
|
}, [ready, currentModel, totalCost, onSubmit, onModelChange, onAbort, onExit, exit, lastPrompt, inputHistory, showStatus]);
|
|
499
504
|
// Mouse support — OFF by default because Node stdin is shared: mouse escape
|
|
@@ -658,8 +663,10 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
658
663
|
turnTierRef.current = event.tier;
|
|
659
664
|
if (event.savings !== undefined)
|
|
660
665
|
turnSavingsRef.current = event.savings;
|
|
661
|
-
if (event.contextPct !== undefined)
|
|
666
|
+
if (event.contextPct !== undefined) {
|
|
662
667
|
setContextPct(event.contextPct);
|
|
668
|
+
turnCtxPctRef.current = event.contextPct;
|
|
669
|
+
}
|
|
663
670
|
break;
|
|
664
671
|
}
|
|
665
672
|
case 'turn_done': {
|
|
@@ -757,9 +764,13 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
757
764
|
const isUserMsg = r.key.startsWith('user-');
|
|
758
765
|
return (_jsxs(Box, { flexDirection: "column", children: [!isUserMsg && (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginTop: 1, children: _jsx(Text, { dimColor: true, children: '─'.repeat(60) }) })), isUserMsg && (_jsx(Box, { marginTop: 1 })), !isUserMsg && r.thinkMs !== undefined && r.thinkMs >= 500 && (_jsx(Box, { paddingLeft: 2, children: _jsxs(Text, { color: "magenta", dimColor: true, children: ["\u273B Thought for ", (r.thinkMs / 1000).toFixed(1), "s", r.thinkChars && r.thinkChars > 20
|
|
759
766
|
? ` · ~${Math.round(r.thinkChars / 4)} tokens`
|
|
760
|
-
: ''] }) })), _jsx(Box, { paddingLeft: isUserMsg ? 0 : 2, children: _jsx(Text, { wrap: "wrap", children: renderMarkdown(r.text) }) }), (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginLeft: 2, marginBottom: 1, children: _jsxs(Text, { dimColor: true, children: [r.tier
|
|
767
|
+
: ''] }) })), _jsx(Box, { paddingLeft: isUserMsg ? 0 : 2, children: _jsx(Text, { wrap: "wrap", children: renderMarkdown(r.text) }) }), (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginLeft: 2, marginBottom: 1, children: _jsxs(Text, { dimColor: true, children: [r.tier
|
|
768
|
+
? _jsxs(Text, { color: "cyan", children: ["[", r.tier, "] "] })
|
|
769
|
+
: (r.model ? _jsx(Text, { dimColor: true, children: "[direct] " }) : null), r.model ? shortModelName(r.model) : '', r.model ? ' · ' : '', r.tokens.calls > 0 && r.tokens.input === 0
|
|
761
770
|
? `${r.tokens.calls} calls`
|
|
762
|
-
: `${formatTokens(r.tokens.input)} in / ${formatTokens(r.tokens.output)} out`, r.cost > 0 ? ` · $${r.cost.toFixed(4)}` : '', r.savings !== undefined && r.savings > 0 ? _jsxs(Text, { color: "green", children: [" saved ", Math.round(r.savings * 100), "%"] }) : ''
|
|
771
|
+
: `${formatTokens(r.tokens.input)} in / ${formatTokens(r.tokens.output)} out`, r.cost > 0 ? ` · $${r.cost.toFixed(4)}` : '', r.savings !== undefined && r.savings > 0 ? _jsxs(Text, { color: "green", children: [" saved ", Math.round(r.savings * 100), "%"] }) : '', r.ctxPct !== undefined && r.ctxPct >= 5
|
|
772
|
+
? _jsxs(Text, { color: r.ctxPct >= 80 ? 'red' : r.ctxPct >= 50 ? 'yellow' : undefined, dimColor: r.ctxPct < 50, children: [" \u00B7 ctx ", r.ctxPct, "%"] })
|
|
773
|
+
: ''] }) }))] }, r.key));
|
|
763
774
|
} }), permissionRequest && (_jsxs(Box, { flexDirection: "column", marginTop: 1, marginLeft: 2, children: [_jsx(Text, { color: "yellow", children: "\u256D\u2500 Permission required \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Text, { color: "yellow", children: ["\u2502 ", _jsx(Text, { bold: true, children: permissionRequest.toolName })] }), permissionRequest.description.split('\n').map((line, i) => (_jsxs(Text, { dimColor: true, children: ["\u2502 ", line] }, i))), _jsx(Text, { color: "yellow", children: "\u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsx(Box, { marginLeft: 2, children: _jsxs(Text, { children: [_jsx(Text, { bold: true, color: "green", children: "[y]" }), _jsx(Text, { dimColor: true, children: " yes " }), _jsx(Text, { bold: true, color: "cyan", children: "[a]" }), _jsx(Text, { dimColor: true, children: " always " }), _jsx(Text, { bold: true, color: "red", children: "[n]" }), _jsx(Text, { dimColor: true, children: " no" })] }) })] })), askUserRequest && (_jsxs(Box, { flexDirection: "column", marginTop: 1, marginLeft: 2, children: [_jsx(Text, { color: "cyan", children: "\u256D\u2500 Question \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Text, { color: "cyan", children: ["\u2502 ", _jsx(Text, { bold: true, children: askUserRequest.question })] }), askUserRequest.options && askUserRequest.options.length > 0 && (askUserRequest.options.map((opt, i) => (_jsxs(Text, { dimColor: true, children: ["\u2502 ", i + 1, ". ", opt] }, i)))), _jsx(Text, { color: "cyan", children: "\u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Box, { marginLeft: 2, children: [_jsx(Text, { bold: true, children: "answer> " }), _jsx(TextInput, { value: askUserInput, onChange: setAskUserInput, onSubmit: (val) => {
|
|
764
775
|
const answer = val.trim() || '(no response)';
|
|
765
776
|
const r = askUserRequest.resolve;
|
package/package.json
CHANGED