@blockrun/franklin 3.15.2 → 3.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/evaluator.js +14 -4
- package/dist/router/index.js +31 -2
- package/dist/ui/app.js +60 -16
- package/package.json +1 -1
package/dist/agent/evaluator.js
CHANGED
|
@@ -289,13 +289,23 @@ function anySignal(signals) {
|
|
|
289
289
|
export function renderGroundingFollowup(result) {
|
|
290
290
|
if (result.verdict === 'GROUNDED' || result.verdict === 'SKIPPED')
|
|
291
291
|
return '';
|
|
292
|
+
// Headers state the situation directly. Old phrasing told the user to "re-run
|
|
293
|
+
// with the suggested tools" which both put the burden on them and exposed
|
|
294
|
+
// FRANKLIN_NO_EVAL as a one-flag escape hatch from the quality gate. New
|
|
295
|
+
// phrasing names the gap and offers a concrete next action.
|
|
292
296
|
const header = result.verdict === 'UNGROUNDED'
|
|
293
|
-
? '⚠️ **
|
|
294
|
-
: '⚠️ **
|
|
297
|
+
? '⚠️ **Unverified answer** — the model produced specific claims without calling any tool to back them up:'
|
|
298
|
+
: '⚠️ **Partial verification** — some claims in the answer aren\'t backed by tool output:';
|
|
295
299
|
const body = result.issues.length > 0
|
|
296
300
|
? result.issues.map(i => `- ${i}`).join('\n')
|
|
297
|
-
: '(evaluator returned no specific items — check the transcript manually)';
|
|
298
|
-
|
|
301
|
+
: '_(evaluator returned no specific items — check the transcript manually)_';
|
|
302
|
+
// Action line: tell the user exactly how to follow up, in their own voice.
|
|
303
|
+
// No env-var escape hatch in the user-facing text — that's a config concern,
|
|
304
|
+
// not a "make this warning go away" concern.
|
|
305
|
+
const action = result.verdict === 'UNGROUNDED'
|
|
306
|
+
? '\n\n_Reply "verify" to re-run with required tool use, or accept the answer as-is._'
|
|
307
|
+
: '\n\n_Reply "verify" to fact-check the flagged claims, or accept the answer as-is._';
|
|
308
|
+
return `\n\n${header}\n${body}${action}`;
|
|
299
309
|
}
|
|
300
310
|
/**
|
|
301
311
|
* Build a synthetic user message that instructs the agent to retry with the
|
package/dist/router/index.js
CHANGED
|
@@ -110,8 +110,26 @@ const REASONING_KEYWORDS = [
|
|
|
110
110
|
'formally', 'mathematical', 'proof', 'logically', '证明', '定理', '推导',
|
|
111
111
|
];
|
|
112
112
|
const SIMPLE_KEYWORDS = [
|
|
113
|
-
|
|
114
|
-
|
|
113
|
+
// True simple intents: greeting, definition lookup, translation. Factual
|
|
114
|
+
// lookups ("who is", "when was", "capital of") were moved to RESEARCH below
|
|
115
|
+
// because they look easy but require external recall — sending them to
|
|
116
|
+
// SIMPLE-tier models reliably produces hallucinated subscriber counts,
|
|
117
|
+
// birth years, etc. that the post-hoc grounding check then has to flag.
|
|
118
|
+
'define', 'translate', 'hello', 'yes or no', '翻译', '你好',
|
|
119
|
+
];
|
|
120
|
+
// Research / fact-retrieval intent: questions whose correct answer depends
|
|
121
|
+
// on data the model can't reliably recall from weights — current statistics,
|
|
122
|
+
// latest news, comparisons, "best" rankings, identities of people/orgs.
|
|
123
|
+
// Bumping tier here pushes them to a MEDIUM/COMPLEX model that has
|
|
124
|
+
// WebSearch in its toolset, instead of letting a cheap text-only model
|
|
125
|
+
// fabricate plausible-looking numbers.
|
|
126
|
+
const RESEARCH_KEYWORDS = [
|
|
127
|
+
'who is', 'who was', 'when was', 'when did', 'what is the capital',
|
|
128
|
+
'how old', 'how many', 'how much',
|
|
129
|
+
'best', 'top ', 'most popular', 'compare', 'vs ', ' vs.',
|
|
130
|
+
'latest', 'current', 'recent', 'today', 'now',
|
|
131
|
+
'subscribers', 'members', 'followers', 'market cap', 'price of',
|
|
132
|
+
'最好的', '最新', '最近', '现在', '当前', '排名', '对比',
|
|
115
133
|
];
|
|
116
134
|
const TECHNICAL_KEYWORDS = [
|
|
117
135
|
'algorithm', 'optimize', 'architecture', 'distributed', 'kubernetes',
|
|
@@ -180,6 +198,17 @@ function classifyRequest(prompt, tokenCount) {
|
|
|
180
198
|
score -= 0.25;
|
|
181
199
|
signals.push('simple');
|
|
182
200
|
}
|
|
201
|
+
// Research / fact-lookup detection (weight: +0.30). Bumps tier upward so
|
|
202
|
+
// questions like "best subreddit", "current price of X", "how many members"
|
|
203
|
+
// route to a model that can actually call WebSearch instead of guessing
|
|
204
|
+
// from weights. Capped at one keyword's worth — research questions
|
|
205
|
+
// typically signal with one phrase, and stacking would push trivial
|
|
206
|
+
// questions into REASONING.
|
|
207
|
+
const researchMatches = countMatches(prompt, RESEARCH_KEYWORDS);
|
|
208
|
+
if (researchMatches >= 1) {
|
|
209
|
+
score += 0.30;
|
|
210
|
+
signals.push('research');
|
|
211
|
+
}
|
|
183
212
|
// Technical complexity (weight: 0.15) - increased
|
|
184
213
|
const techMatches = countMatches(prompt, TECHNICAL_KEYWORDS);
|
|
185
214
|
if (techMatches >= 2) {
|
package/dist/ui/app.js
CHANGED
|
@@ -99,6 +99,11 @@ function formatAgentErrorForDisplay(error) {
|
|
|
99
99
|
}
|
|
100
100
|
function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain, startWithPicker, onSubmit, onModelChange, onAbort, onExit, }) {
|
|
101
101
|
const { exit } = useApp();
|
|
102
|
+
// Track terminal rows so we can cap the dynamic-region height. Ink wipes the
|
|
103
|
+
// terminal scrollback (via ansiEscapes.clearTerminal → \x1b[3J) whenever the
|
|
104
|
+
// dynamic output exceeds rows, so any tall live region (streaming text,
|
|
105
|
+
// model picker) must be windowed to preserve "scroll to the start" history.
|
|
106
|
+
const { rows: termRows } = useTerminalSize();
|
|
102
107
|
const [input, setInput] = useState('');
|
|
103
108
|
const [streamText, setStreamText] = useState('');
|
|
104
109
|
const [thinking, setThinking] = useState(false);
|
|
@@ -199,6 +204,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
199
204
|
const turnModelRef = useRef(undefined);
|
|
200
205
|
const turnTierRef = useRef(undefined);
|
|
201
206
|
const turnSavingsRef = useRef(undefined);
|
|
207
|
+
const turnCtxPctRef = useRef(undefined);
|
|
202
208
|
const queuedInputsRef = useRef([]);
|
|
203
209
|
// Keep refs in sync so memoized event handlers can read current values
|
|
204
210
|
streamTextRef.current = streamText;
|
|
@@ -236,6 +242,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
236
242
|
model: turnModelRef.current,
|
|
237
243
|
tier: turnTierRef.current,
|
|
238
244
|
savings: turnSavingsRef.current,
|
|
245
|
+
ctxPct: turnCtxPctRef.current,
|
|
239
246
|
thinkMs,
|
|
240
247
|
thinkChars,
|
|
241
248
|
}];
|
|
@@ -418,6 +425,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
418
425
|
turnModelRef.current = undefined;
|
|
419
426
|
turnTierRef.current = undefined;
|
|
420
427
|
turnSavingsRef.current = undefined;
|
|
428
|
+
turnCtxPctRef.current = undefined;
|
|
421
429
|
setWaiting(true);
|
|
422
430
|
setReady(false);
|
|
423
431
|
// Pass through to agent loop to clear the actual conversation history
|
|
@@ -439,6 +447,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
439
447
|
turnModelRef.current = undefined;
|
|
440
448
|
turnTierRef.current = undefined;
|
|
441
449
|
turnSavingsRef.current = undefined;
|
|
450
|
+
turnCtxPctRef.current = undefined;
|
|
442
451
|
onSubmit(lastPrompt);
|
|
443
452
|
return;
|
|
444
453
|
default:
|
|
@@ -489,6 +498,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
489
498
|
turnModelRef.current = undefined;
|
|
490
499
|
turnTierRef.current = undefined;
|
|
491
500
|
turnSavingsRef.current = undefined;
|
|
501
|
+
turnCtxPctRef.current = undefined;
|
|
492
502
|
onSubmit(trimmed);
|
|
493
503
|
}, [ready, currentModel, totalCost, onSubmit, onModelChange, onAbort, onExit, exit, lastPrompt, inputHistory, showStatus]);
|
|
494
504
|
// Mouse support — OFF by default because Node stdin is shared: mouse escape
|
|
@@ -653,8 +663,10 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
653
663
|
turnTierRef.current = event.tier;
|
|
654
664
|
if (event.savings !== undefined)
|
|
655
665
|
turnSavingsRef.current = event.savings;
|
|
656
|
-
if (event.contextPct !== undefined)
|
|
666
|
+
if (event.contextPct !== undefined) {
|
|
657
667
|
setContextPct(event.contextPct);
|
|
668
|
+
turnCtxPctRef.current = event.contextPct;
|
|
669
|
+
}
|
|
658
670
|
break;
|
|
659
671
|
}
|
|
660
672
|
case 'turn_done': {
|
|
@@ -752,16 +764,20 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
752
764
|
const isUserMsg = r.key.startsWith('user-');
|
|
753
765
|
return (_jsxs(Box, { flexDirection: "column", children: [!isUserMsg && (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginTop: 1, children: _jsx(Text, { dimColor: true, children: '─'.repeat(60) }) })), isUserMsg && (_jsx(Box, { marginTop: 1 })), !isUserMsg && r.thinkMs !== undefined && r.thinkMs >= 500 && (_jsx(Box, { paddingLeft: 2, children: _jsxs(Text, { color: "magenta", dimColor: true, children: ["\u273B Thought for ", (r.thinkMs / 1000).toFixed(1), "s", r.thinkChars && r.thinkChars > 20
|
|
754
766
|
? ` · ~${Math.round(r.thinkChars / 4)} tokens`
|
|
755
|
-
: ''] }) })), _jsx(Box, { paddingLeft: isUserMsg ? 0 : 2, children: _jsx(Text, { wrap: "wrap", children: renderMarkdown(r.text) }) }), (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginLeft: 2, marginBottom: 1, children: _jsxs(Text, { dimColor: true, children: [r.tier
|
|
767
|
+
: ''] }) })), _jsx(Box, { paddingLeft: isUserMsg ? 0 : 2, children: _jsx(Text, { wrap: "wrap", children: renderMarkdown(r.text) }) }), (r.tokens.input > 0 || r.tokens.output > 0) && (_jsx(Box, { marginLeft: 2, marginBottom: 1, children: _jsxs(Text, { dimColor: true, children: [r.tier
|
|
768
|
+
? _jsxs(Text, { color: "cyan", children: ["[", r.tier, "] "] })
|
|
769
|
+
: (r.model ? _jsx(Text, { dimColor: true, children: "[direct] " }) : null), r.model ? shortModelName(r.model) : '', r.model ? ' · ' : '', r.tokens.calls > 0 && r.tokens.input === 0
|
|
756
770
|
? `${r.tokens.calls} calls`
|
|
757
|
-
: `${formatTokens(r.tokens.input)} in / ${formatTokens(r.tokens.output)} out`, r.cost > 0 ? ` · $${r.cost.toFixed(4)}` : '', r.savings !== undefined && r.savings > 0 ? _jsxs(Text, { color: "green", children: [" saved ", Math.round(r.savings * 100), "%"] }) : ''
|
|
771
|
+
: `${formatTokens(r.tokens.input)} in / ${formatTokens(r.tokens.output)} out`, r.cost > 0 ? ` · $${r.cost.toFixed(4)}` : '', r.savings !== undefined && r.savings > 0 ? _jsxs(Text, { color: "green", children: [" saved ", Math.round(r.savings * 100), "%"] }) : '', r.ctxPct !== undefined && r.ctxPct >= 5
|
|
772
|
+
? _jsxs(Text, { color: r.ctxPct >= 80 ? 'red' : r.ctxPct >= 50 ? 'yellow' : undefined, dimColor: r.ctxPct < 50, children: [" \u00B7 ctx ", r.ctxPct, "%"] })
|
|
773
|
+
: ''] }) }))] }, r.key));
|
|
758
774
|
} }), permissionRequest && (_jsxs(Box, { flexDirection: "column", marginTop: 1, marginLeft: 2, children: [_jsx(Text, { color: "yellow", children: "\u256D\u2500 Permission required \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Text, { color: "yellow", children: ["\u2502 ", _jsx(Text, { bold: true, children: permissionRequest.toolName })] }), permissionRequest.description.split('\n').map((line, i) => (_jsxs(Text, { dimColor: true, children: ["\u2502 ", line] }, i))), _jsx(Text, { color: "yellow", children: "\u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsx(Box, { marginLeft: 2, children: _jsxs(Text, { children: [_jsx(Text, { bold: true, color: "green", children: "[y]" }), _jsx(Text, { dimColor: true, children: " yes " }), _jsx(Text, { bold: true, color: "cyan", children: "[a]" }), _jsx(Text, { dimColor: true, children: " always " }), _jsx(Text, { bold: true, color: "red", children: "[n]" }), _jsx(Text, { dimColor: true, children: " no" })] }) })] })), askUserRequest && (_jsxs(Box, { flexDirection: "column", marginTop: 1, marginLeft: 2, children: [_jsx(Text, { color: "cyan", children: "\u256D\u2500 Question \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Text, { color: "cyan", children: ["\u2502 ", _jsx(Text, { bold: true, children: askUserRequest.question })] }), askUserRequest.options && askUserRequest.options.length > 0 && (askUserRequest.options.map((opt, i) => (_jsxs(Text, { dimColor: true, children: ["\u2502 ", i + 1, ". ", opt] }, i)))), _jsx(Text, { color: "cyan", children: "\u2570\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" }), _jsxs(Box, { marginLeft: 2, children: [_jsx(Text, { bold: true, children: "answer> " }), _jsx(TextInput, { value: askUserInput, onChange: setAskUserInput, onSubmit: (val) => {
|
|
759
775
|
const answer = val.trim() || '(no response)';
|
|
760
776
|
const r = askUserRequest.resolve;
|
|
761
777
|
setAskUserRequest(null);
|
|
762
778
|
setAskUserInput('');
|
|
763
779
|
r(answer);
|
|
764
|
-
}, focus: true })] })] })), expandableTool && (() => {
|
|
780
|
+
}, focus: true })] })] })), expandableTool && !permissionRequest && !askUserRequest && (() => {
|
|
765
781
|
const tool = expandableTool;
|
|
766
782
|
const elapsedFmt = tool.elapsed >= 1000
|
|
767
783
|
? `${(tool.elapsed / 1000).toFixed(1)}s`
|
|
@@ -776,18 +792,46 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
|
|
|
776
792
|
const lines = thinkingText.split('\n').filter(Boolean).slice(-3);
|
|
777
793
|
return (_jsx(Box, { flexDirection: "column", marginLeft: 2, children: lines.map((line, i) => (_jsxs(Text, { dimColor: true, wrap: "truncate-end", children: ['⎿ ', line.slice(0, 120)] }, i))) }));
|
|
778
794
|
})()] })), waiting && !thinking && tools.size === 0 && (_jsx(Box, { marginLeft: 2, children: _jsxs(Text, { color: "yellow", children: [_jsx(Spinner, { type: "dots" }), ' ', _jsxs(Text, { dimColor: true, children: [shortModelName(currentModel), completedTools.length > 0 ? ` · step ${completedTools.length + 1}` : ''] })] }) })), streamText && (() => {
|
|
779
|
-
const
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
795
|
+
const maxLines = Math.max(8, termRows - 12);
|
|
796
|
+
const lines = streamText.split('\n');
|
|
797
|
+
const truncated = lines.length > maxLines;
|
|
798
|
+
const visible = truncated ? lines.slice(-maxLines).join('\n') : streamText;
|
|
799
|
+
const { rendered, partial } = renderMarkdownStreaming(visible);
|
|
800
|
+
return (_jsxs(Box, { flexDirection: "column", marginTop: 0, marginBottom: 0, marginLeft: 2, children: [truncated && (_jsxs(Text, { dimColor: true, children: ["\u2191 ", lines.length - maxLines, " earlier line", lines.length - maxLines === 1 ? '' : 's', " \u2014 full response will appear in scrollback when this turn finishes"] })), _jsxs(Text, { wrap: "wrap", children: [rendered, rendered && partial ? '\n' : '', partial] })] }));
|
|
801
|
+
})(), responsePreview && !streamText && !permissionRequest && !askUserRequest && (_jsx(Box, { flexDirection: "column", marginBottom: 0, marginLeft: 2, children: _jsx(Text, { wrap: "wrap", children: renderMarkdown(responsePreview) }) })), inPicker && (() => {
|
|
802
|
+
const totalModels = PICKER_MODELS_FLAT.length;
|
|
803
|
+
const maxModels = Math.max(6, termRows - 12);
|
|
804
|
+
let start = Math.max(0, pickerIdx - Math.floor(maxModels / 2));
|
|
805
|
+
let end = Math.min(totalModels, start + maxModels);
|
|
806
|
+
// Expand window backward if we hit the bottom of the list, so we
|
|
807
|
+
// always fill `maxModels` rows when the list is long enough.
|
|
808
|
+
if (end - start < maxModels)
|
|
809
|
+
start = Math.max(0, end - maxModels);
|
|
810
|
+
const hiddenAbove = start;
|
|
811
|
+
const hiddenBelow = totalModels - end;
|
|
812
|
+
// Pre-compute each category's base offset into the flat model list so
|
|
813
|
+
// we can map (cat, localIdx) → globalIdx in one pass without re-walking.
|
|
814
|
+
let cursor = 0;
|
|
815
|
+
const catBases = PICKER_CATEGORIES.map((cat) => {
|
|
816
|
+
const base = cursor;
|
|
817
|
+
cursor += cat.models.length;
|
|
818
|
+
return base;
|
|
819
|
+
});
|
|
820
|
+
return (_jsxs(Box, { flexDirection: "column", marginTop: 1, children: [_jsxs(Box, { marginLeft: 2, children: [_jsx(Text, { bold: true, children: "Select a model " }), _jsx(Text, { dimColor: true, children: "(\u2191\u2193 navigate, Enter select, Esc cancel)" })] }), hiddenAbove > 0 && (_jsx(Box, { marginLeft: 2, marginTop: 1, children: _jsxs(Text, { dimColor: true, children: ["\u2191 ", hiddenAbove, " more above"] }) })), PICKER_CATEGORIES.map((cat, catIdx) => {
|
|
821
|
+
const base = catBases[catIdx];
|
|
822
|
+
const visible = cat.models
|
|
823
|
+
.map((m, localIdx) => ({ m, globalIdx: base + localIdx }))
|
|
824
|
+
.filter(({ globalIdx }) => globalIdx >= start && globalIdx < end);
|
|
825
|
+
if (visible.length === 0)
|
|
826
|
+
return null;
|
|
827
|
+
return (_jsxs(Box, { flexDirection: "column", marginTop: 1, children: [_jsx(Box, { marginLeft: 2, children: _jsxs(Text, { dimColor: true, children: ["\u2500\u2500 ", cat.category, " \u2500\u2500"] }) }), visible.map(({ m, globalIdx }) => {
|
|
828
|
+
const isSelected = globalIdx === pickerIdx;
|
|
829
|
+
const isCurrent = m.id === currentModel;
|
|
830
|
+
const isHighlight = m.highlight === true;
|
|
831
|
+
return (_jsxs(Box, { marginLeft: 2, children: [_jsxs(Text, { inverse: isSelected, color: isSelected ? 'cyan' : isHighlight ? 'yellow' : undefined, bold: isSelected || isHighlight, children: [' ', m.label.padEnd(26), ' '] }), _jsxs(Text, { dimColor: true, children: [" ", m.shortcut.padEnd(14)] }), _jsx(Text, { color: m.price === 'FREE' ? 'green' : isHighlight ? 'yellow' : undefined, dimColor: !isHighlight && m.price !== 'FREE', children: m.price }), isCurrent && _jsx(Text, { color: "green", children: " \u2190" })] }, m.id));
|
|
832
|
+
})] }, cat.category));
|
|
833
|
+
}), hiddenBelow > 0 && (_jsx(Box, { marginLeft: 2, marginTop: 1, children: _jsxs(Text, { dimColor: true, children: ["\u2193 ", hiddenBelow, " more below"] }) })), _jsx(Box, { marginTop: 1, marginLeft: 2, children: _jsx(Text, { dimColor: true, children: "Your conversation stays above \u2014 picking a model keeps all history intact." }) })] }));
|
|
834
|
+
})(), !inPicker && !permissionRequest && !askUserRequest && (_jsx(InputBox, { input: input, setInput: setInput, onSubmit: handleSubmit, model: currentModel, balance: liveBalance, chain: chain, walletTail: walletAddress && walletAddress.length >= 4 && !walletAddress.startsWith('not set') ? walletAddress.slice(-4) : undefined, sessionCost: totalCost, queued: queuedInputs[0] || undefined, queuedCount: queuedInputs.length, focused: !permissionRequest && !askUserRequest, busy: !askUserRequest && (waiting || thinking || tools.size > 0), contextPct: contextPct, vimMode: vimEnabled, onVimModeChange: setCurrentVimMode }))] }));
|
|
791
835
|
}
|
|
792
836
|
export function launchInkUI(opts) {
|
|
793
837
|
let resolveInput = null;
|
package/package.json
CHANGED