@blockrun/franklin 3.15.7 → 3.15.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/error-classifier.js +22 -0
- package/dist/agent/evaluator.js +30 -3
- package/dist/agent/loop.js +47 -2
- package/dist/tools/webfetch.js +98 -1
- package/package.json +1 -1
|
@@ -111,6 +111,28 @@ export function classifyAgentError(message) {
|
|
|
111
111
|
suggestion: 'The model is overloaded. Try /model to switch, or wait and /retry.',
|
|
112
112
|
};
|
|
113
113
|
}
|
|
114
|
+
// Reasoning / thinking-mode format errors — NOT transient.
|
|
115
|
+
// DeepSeek V4 family and similar thinking-enabled models reject requests
|
|
116
|
+
// when the message history's reasoning_content fields don't match the
|
|
117
|
+
// upstream's expected shape (typically: tool-call assistant messages must
|
|
118
|
+
// carry reasoning_content; non-tool-call ones must not, or vice versa).
|
|
119
|
+
// The fix is to drop the polluting history, not to swap models — every
|
|
120
|
+
// thinking-enabled model has the same constraint just with different
|
|
121
|
+
// specifics. /clear forces a fresh context that won't have the bad shape.
|
|
122
|
+
// Classified BEFORE the generic schema branch below so we surface the
|
|
123
|
+
// right suggestion.
|
|
124
|
+
if (includesAny(err, [
|
|
125
|
+
'reasoning_content',
|
|
126
|
+
'reasoning content',
|
|
127
|
+
'thinking mode must',
|
|
128
|
+
'message format incompatible',
|
|
129
|
+
'reasoning_format_error',
|
|
130
|
+
])) {
|
|
131
|
+
return {
|
|
132
|
+
category: 'schema', label: 'Schema', isTransient: false, maxRetries: 0,
|
|
133
|
+
suggestion: 'Thinking-mode history is incompatible with this model. Use /clear to reset and retry, or /model to switch to a non-thinking model.',
|
|
134
|
+
};
|
|
135
|
+
}
|
|
114
136
|
// Schema / tool-definition errors — NOT transient, retrying won't help.
|
|
115
137
|
// These can be wrapped in 5xx responses (e.g. '503: 400 Invalid schema'),
|
|
116
138
|
// so classify them BEFORE the generic server-error branch below.
|
package/dist/agent/evaluator.js
CHANGED
|
@@ -65,9 +65,36 @@ Flag as tool-use refusal:
|
|
|
65
65
|
|
|
66
66
|
VERDICT: GROUNDED | PARTIAL | UNGROUNDED
|
|
67
67
|
|
|
68
|
-
If not GROUNDED, list each issue on its own line starting with "- " and the tool that should have been called
|
|
69
|
-
|
|
70
|
-
|
|
68
|
+
If not GROUNDED, list each issue on its own line starting with "- " and the tool that should have been called.
|
|
69
|
+
|
|
70
|
+
## Picking the right tool — strict domain rules
|
|
71
|
+
|
|
72
|
+
**Default for any factual claim:** WebSearch or ExaSearch. These are the
|
|
73
|
+
right answer for the OVERWHELMING majority of "the model said a number it
|
|
74
|
+
didn't look up" cases — current events, statistics, prices for non-crypto
|
|
75
|
+
goods (real estate, retail, salaries), people, companies, news, etc.
|
|
76
|
+
|
|
77
|
+
**Use specialized tools ONLY when the claim's domain matches:**
|
|
78
|
+
- TradingMarket / TradingSignal — ONLY for cryptocurrency tickers (BTC, ETH, SOL, etc). Never for stocks, real estate, currencies, commodities outside crypto.
|
|
79
|
+
- DefiLlamaProtocol / DefiLlamaYields / DefiLlamaPrice — ONLY for DeFi protocols, TVL, yields, on-chain token prices.
|
|
80
|
+
- SearchX — ONLY for X.com / Twitter posts and accounts.
|
|
81
|
+
- ExaAnswer — research questions where you want a synthesized answer with citations.
|
|
82
|
+
- WebFetch — claims that quote a SPECIFIC URL the model already named.
|
|
83
|
+
|
|
84
|
+
**Anti-patterns to never produce:**
|
|
85
|
+
- Real-estate price → TradingMarket (TradingMarket is crypto-only — wrong domain)
|
|
86
|
+
- Stock ticker → TradingMarket (also crypto-only — use WebSearch instead)
|
|
87
|
+
- Generic news / statistics → TradingMarket (use WebSearch)
|
|
88
|
+
- Person's biography → TradingMarket (use WebSearch)
|
|
89
|
+
|
|
90
|
+
When unsure: name **WebSearch**. It's the safe default for factual grounding.
|
|
91
|
+
|
|
92
|
+
## Format examples
|
|
93
|
+
|
|
94
|
+
- Claim: "<the ungrounded part, quoted briefly>" → missing tool: WebSearch
|
|
95
|
+
- Claim: "BTC at $67k" → missing tool: TradingMarket
|
|
96
|
+
- Claim: "Westlake $/sqft is $719" → missing tool: WebSearch
|
|
97
|
+
- Refusal: "<the refusal phrase, quoted briefly>" → should have called: WebSearch
|
|
71
98
|
|
|
72
99
|
Empty line between verdict and list. No other text. No preamble. No apology. Be terse.`;
|
|
73
100
|
// ─── Trigger policy ──────────────────────────────────────────────────────
|
package/dist/agent/loop.js
CHANGED
|
@@ -241,6 +241,39 @@ export function looksLikeGatewayErrorAsText(parts) {
|
|
|
241
241
|
return { match: false, message: '' };
|
|
242
242
|
return { match: true, message: m[1].trim() };
|
|
243
243
|
}
|
|
244
|
+
/**
|
|
245
|
+
* Domain check for the grounding-retry force-tool path. A specialized tool
|
|
246
|
+
* (TradingMarket, DefiLlama*, jupiter*, base0x*, SearchX) should only be
|
|
247
|
+
* pinned by tool_choice when the user prompt actually references that
|
|
248
|
+
* tool's domain — otherwise we let the smart generator pick from any tool.
|
|
249
|
+
*
|
|
250
|
+
* The motivating bug: a real-estate question ("可以还价 20% 吗") had its
|
|
251
|
+
* answer flagged as ungrounded for citing $/sqft figures. The cheap
|
|
252
|
+
* evaluator model picked TradingMarket as the missing tool because it
|
|
253
|
+
* was the first example in the evaluator prompt. Forcing TradingMarket
|
|
254
|
+
* (a crypto-only tool) on a housing question made the retry useless.
|
|
255
|
+
*
|
|
256
|
+
* This function returns false for specialized tools when the prompt has
|
|
257
|
+
* no matching domain keywords; the caller falls back to "any" tool.
|
|
258
|
+
* General-purpose tools (WebSearch, ExaSearch, ExaAnswer, WebFetch,
|
|
259
|
+
* ExaReadUrls) always pass — they're domain-agnostic.
|
|
260
|
+
*/
|
|
261
|
+
function isToolRelevantToPrompt(toolName, promptLower) {
|
|
262
|
+
// Crypto trading tools — need a ticker, "crypto", "coin", "swap", etc.
|
|
263
|
+
if (/^(Trading|DefiLlama|Jupiter|Base0x|Base0xGasless)/i.test(toolName)) {
|
|
264
|
+
return /\b(btc|eth|sol|xrp|doge|usdc|usdt|crypto|coin|token|defi|tvl|yield|swap|jupiter|uniswap|pump\.fun|solana|base chain|polygon|ethereum|币|代币|链上|做空|做多)\b/i.test(promptLower);
|
|
265
|
+
}
|
|
266
|
+
// X.com search — need an @handle, "twitter", "tweet", "X.com"
|
|
267
|
+
if (/^SearchX$/i.test(toolName) || /^PostToX$/i.test(toolName)) {
|
|
268
|
+
return /(@\w+|twitter|x\.com|tweet|推特)/i.test(promptLower);
|
|
269
|
+
}
|
|
270
|
+
// Image / video / music gen — need a creative-content request
|
|
271
|
+
if (/^(ImageGen|VideoGen|MusicGen)$/i.test(toolName)) {
|
|
272
|
+
return /\b(image|picture|photo|video|clip|music|song|generate|create|render|draw|画|图|视频|音乐|歌)\b/i.test(promptLower);
|
|
273
|
+
}
|
|
274
|
+
// General-purpose / file / shell tools — always relevant.
|
|
275
|
+
return true;
|
|
276
|
+
}
|
|
244
277
|
/**
|
|
245
278
|
* Calculate backoff delay with jitter to avoid thundering herd.
|
|
246
279
|
* Base: exponential (2^attempt * 1000ms), jitter: ±25%.
|
|
@@ -1349,11 +1382,23 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1349
1382
|
// Hard enforcement: set tool_choice so the model can't fabricate
|
|
1350
1383
|
// citations in lieu of running tools (the round-2 failure mode
|
|
1351
1384
|
// from the Tampa→Miami log). If the evaluator named exactly one
|
|
1352
|
-
// available tool
|
|
1385
|
+
// available tool AND that tool's domain matches the user's
|
|
1386
|
+
// prompt, pin to it; otherwise force "any" tool use and let
|
|
1387
|
+
// the generator pick the right one.
|
|
1388
|
+
//
|
|
1389
|
+
// Domain validation guards against the cheap evaluator model
|
|
1390
|
+
// hallucinating a wrong specialized tool (e.g., suggesting
|
|
1391
|
+
// TradingMarket for a real-estate question because the prompt
|
|
1392
|
+
// listed it as the first example tool). Specialized tools —
|
|
1393
|
+
// crypto trading, DeFi, swap quotes, X.com search — only get
|
|
1394
|
+
// pinned when their domain keywords appear in the user prompt;
|
|
1395
|
+
// otherwise we drop down to "any tool" and let the smart
|
|
1396
|
+
// generator model decide based on tool descriptions.
|
|
1353
1397
|
const namedTools = extractMissingToolNames(gResult);
|
|
1354
1398
|
const availableNames = new Set(buildCallToolDefs().map(t => t.name));
|
|
1355
1399
|
const matched = namedTools.filter(n => availableNames.has(n));
|
|
1356
|
-
|
|
1400
|
+
const promptForDomainCheck = (lastUserInput || '').toLowerCase();
|
|
1401
|
+
if (matched.length === 1 && isToolRelevantToPrompt(matched[0], promptForDomainCheck)) {
|
|
1357
1402
|
forceToolChoiceNextRound = { type: 'tool', name: matched[0] };
|
|
1358
1403
|
}
|
|
1359
1404
|
else if (availableNames.size > 0) {
|
package/dist/tools/webfetch.js
CHANGED
|
@@ -58,6 +58,22 @@ async function execute(input, ctx) {
|
|
|
58
58
|
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
59
59
|
return { output: `Error: only http/https URLs are supported`, isError: true };
|
|
60
60
|
}
|
|
61
|
+
// ── Pre-flight: known anti-bot domains ──
|
|
62
|
+
// Sites that systematically block scripted access return 403 / 429 /
|
|
63
|
+
// captcha challenges to plain GET requests no matter what UA we send.
|
|
64
|
+
// Without this guard the model burns multiple turns retrying variations
|
|
65
|
+
// (Zillow → /research/austin-tx, /homedetails/X, /sold/Y...) that all
|
|
66
|
+
// 403 the same way, padding the step counter and the user's bill.
|
|
67
|
+
// Short-circuiting here returns a single actionable error instead.
|
|
68
|
+
const blocked = isBlockedDomain(parsed.hostname);
|
|
69
|
+
if (blocked) {
|
|
70
|
+
return {
|
|
71
|
+
output: `${parsed.hostname} systematically blocks automated fetch (${blocked.reason}). ` +
|
|
72
|
+
`Switch tools: ${blocked.alternative}. Don't retry this URL with WebFetch — ` +
|
|
73
|
+
`every variant of the same hostname returns the same block.`,
|
|
74
|
+
isError: true,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
61
77
|
const maxLen = Math.min(max_length ?? DEFAULT_MAX_LENGTH, MAX_BODY_BYTES);
|
|
62
78
|
// ── YouTube special case ──
|
|
63
79
|
// Plain HTML fetch on a youtube.com URL returns the SPA bundle (a wall of
|
|
@@ -108,8 +124,19 @@ async function execute(input, ctx) {
|
|
|
108
124
|
redirect: 'follow',
|
|
109
125
|
});
|
|
110
126
|
if (!response.ok) {
|
|
127
|
+
// 403 / 429 from a domain not in the static block list often still
|
|
128
|
+
// means anti-bot — many sites tier their detection (first hit OK,
|
|
129
|
+
// subsequent ones blocked) or rely on UA fingerprinting. Surface
|
|
130
|
+
// this as an actionable hint so the model switches strategy
|
|
131
|
+
// instead of retrying the same URL with a different path.
|
|
132
|
+
const isAntiBot = response.status === 403 || response.status === 429 ||
|
|
133
|
+
response.status === 503;
|
|
134
|
+
const hint = isAntiBot
|
|
135
|
+
? ` — ${parsed.hostname} likely blocks automated fetch. Try WebSearch for the same query, ` +
|
|
136
|
+
`or fetch a different domain that publishes the same data.`
|
|
137
|
+
: '';
|
|
111
138
|
return {
|
|
112
|
-
output: `HTTP ${response.status} ${response.statusText} for ${url}`,
|
|
139
|
+
output: `HTTP ${response.status} ${response.statusText} for ${url}${hint}`,
|
|
113
140
|
isError: true,
|
|
114
141
|
};
|
|
115
142
|
}
|
|
@@ -176,6 +203,76 @@ async function execute(input, ctx) {
|
|
|
176
203
|
ctx.abortSignal.removeEventListener('abort', onAbort);
|
|
177
204
|
}
|
|
178
205
|
}
|
|
206
|
+
const BLOCKED_DOMAINS = [
|
|
207
|
+
{
|
|
208
|
+
pattern: /(^|\.)zillow\.com$/i,
|
|
209
|
+
reason: '403 to all non-browser GETs',
|
|
210
|
+
alternative: 'use WebSearch for "Austin TX home price trends" or similar',
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
pattern: /(^|\.)redfin\.com$/i,
|
|
214
|
+
reason: '403 / captcha challenge to scripted requests',
|
|
215
|
+
alternative: 'use WebSearch with the property address or zip code',
|
|
216
|
+
},
|
|
217
|
+
{
|
|
218
|
+
pattern: /(^|\.)realtor\.com$/i,
|
|
219
|
+
reason: '403 / interstitial to non-browser UAs',
|
|
220
|
+
alternative: 'use WebSearch',
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
pattern: /(^|\.)linkedin\.com$/i,
|
|
224
|
+
reason: 'auth wall on every page',
|
|
225
|
+
alternative: 'use SearchX (X is the better discovery surface for the same people) or WebSearch',
|
|
226
|
+
},
|
|
227
|
+
{
|
|
228
|
+
pattern: /(^|\.)instagram\.com$/i,
|
|
229
|
+
reason: 'auth wall + 401 to public profile fetches',
|
|
230
|
+
alternative: 'use WebSearch for the username',
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
pattern: /(^|\.)facebook\.com$/i,
|
|
234
|
+
reason: 'auth wall on most public content',
|
|
235
|
+
alternative: 'use WebSearch',
|
|
236
|
+
},
|
|
237
|
+
{
|
|
238
|
+
pattern: /(^|\.)x\.com$/i,
|
|
239
|
+
reason: 'X.com requires authenticated API',
|
|
240
|
+
alternative: 'use SearchX (the dedicated X tool) instead of WebFetch',
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
pattern: /(^|\.)twitter\.com$/i,
|
|
244
|
+
reason: 'X.com requires authenticated API',
|
|
245
|
+
alternative: 'use SearchX (the dedicated X tool) instead of WebFetch',
|
|
246
|
+
},
|
|
247
|
+
{
|
|
248
|
+
pattern: /(^|\.)tiktok\.com$/i,
|
|
249
|
+
reason: 'returns SPA shell + JS challenge',
|
|
250
|
+
alternative: 'use WebSearch with the @username',
|
|
251
|
+
},
|
|
252
|
+
{
|
|
253
|
+
pattern: /(^|\.)reuters\.com$/i,
|
|
254
|
+
reason: 'paywall + bot detection',
|
|
255
|
+
alternative: 'use WebSearch which surfaces cached headlines',
|
|
256
|
+
},
|
|
257
|
+
{
|
|
258
|
+
pattern: /(^|\.)bloomberg\.com$/i,
|
|
259
|
+
reason: 'paywall + bot detection',
|
|
260
|
+
alternative: 'use WebSearch for the same story',
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
pattern: /(^|\.)wsj\.com$/i,
|
|
264
|
+
reason: 'paywall',
|
|
265
|
+
alternative: 'use WebSearch for the same story',
|
|
266
|
+
},
|
|
267
|
+
];
|
|
268
|
+
function isBlockedDomain(hostname) {
|
|
269
|
+
for (const entry of BLOCKED_DOMAINS) {
|
|
270
|
+
if (entry.pattern.test(hostname)) {
|
|
271
|
+
return { reason: entry.reason, alternative: entry.alternative };
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
return null;
|
|
275
|
+
}
|
|
179
276
|
// ─── YouTube transcript fetcher ─────────────────────────────────────────────
|
|
180
277
|
// Fetches auto-generated or uploaded captions for a YouTube video by parsing
|
|
181
278
|
// the watch-page's `ytInitialPlayerResponse` JSON. Pure HTTP, no deps. Saves
|
package/package.json
CHANGED