@blockrun/franklin 3.15.6 → 3.15.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/loop.js +41 -2
- package/dist/tools/webfetch.js +98 -1
- package/package.json +1 -1
package/dist/agent/loop.js
CHANGED
|
@@ -22,7 +22,7 @@ import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
|
|
|
22
22
|
import { estimateCost, OPUS_PRICING } from '../pricing.js';
|
|
23
23
|
import { maybeMidSessionExtract } from '../learnings/extractor.js';
|
|
24
24
|
import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
|
|
25
|
-
import { routeRequestAsync, resolveTierToModel, parseRoutingProfile } from '../router/index.js';
|
|
25
|
+
import { routeRequestAsync, resolveTierToModel, parseRoutingProfile, getFallbackChain } from '../router/index.js';
|
|
26
26
|
import { recordOutcome } from '../router/local-elo.js';
|
|
27
27
|
import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
|
|
28
28
|
import { shouldVerify, runVerification } from './verification.js';
|
|
@@ -505,6 +505,11 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
505
505
|
let recoveryAttempts = 0;
|
|
506
506
|
let autoContinuationCount = 0;
|
|
507
507
|
const MAX_RECOVERY_ATTEMPTS = 5;
|
|
508
|
+
// Track per-model server-error streak so we can break out of a stuck
|
|
509
|
+
// upstream and try the next model in the routing fallback chain instead
|
|
510
|
+
// of burning all MAX_RECOVERY_ATTEMPTS retries on the same failure.
|
|
511
|
+
const serverErrorsByModel = new Map();
|
|
512
|
+
const SERVER_ERROR_STREAK_BEFORE_SWITCH = 2;
|
|
508
513
|
let compactFailures = 0;
|
|
509
514
|
let maxTokensOverride;
|
|
510
515
|
const turnIdleReference = lastSessionActivity;
|
|
@@ -993,14 +998,48 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
993
998
|
}
|
|
994
999
|
}
|
|
995
1000
|
if (classified.isTransient && recoveryAttempts < effectiveMaxRetries) {
|
|
1001
|
+
// Server-error streak guard: if the same model 5xx's twice in a row
|
|
1002
|
+
// it's almost always an upstream incident, not a blip. Switch to
|
|
1003
|
+
// the next routing fallback instead of waiting out 5 backoffs on a
|
|
1004
|
+
// dead provider — same idea as the payment-failure auto-fallback
|
|
1005
|
+
// below, but for transient server errors. Skipped for non-server
|
|
1006
|
+
// transients (rate limits, network blips) where retry is the right
|
|
1007
|
+
// call. Also skipped when the user picked a concrete model — they
|
|
1008
|
+
// explicitly chose this one, so we shouldn't silently swap.
|
|
1009
|
+
if (classified.category === 'server' && parseRoutingProfile(config.model)) {
|
|
1010
|
+
const streak = (serverErrorsByModel.get(resolvedModel) ?? 0) + 1;
|
|
1011
|
+
serverErrorsByModel.set(resolvedModel, streak);
|
|
1012
|
+
if (streak >= SERVER_ERROR_STREAK_BEFORE_SWITCH) {
|
|
1013
|
+
const fallbackChain = getFallbackChain(routingTier ?? 'MEDIUM', parseRoutingProfile(config.model) ?? 'auto');
|
|
1014
|
+
const nextModel = fallbackChain.find(m => m !== resolvedModel && (serverErrorsByModel.get(m) ?? 0) < SERVER_ERROR_STREAK_BEFORE_SWITCH);
|
|
1015
|
+
if (nextModel) {
|
|
1016
|
+
config.model = nextModel;
|
|
1017
|
+
config.onModelChange?.(nextModel, 'system');
|
|
1018
|
+
recoveryAttempts = 0;
|
|
1019
|
+
onEvent({
|
|
1020
|
+
kind: 'text_delta',
|
|
1021
|
+
text: `\n*${resolvedModel} keeps 5xx'ing (${streak} in a row) — switching to ${nextModel}*\n`,
|
|
1022
|
+
});
|
|
1023
|
+
continue;
|
|
1024
|
+
}
|
|
1025
|
+
// No alternative left in the fallback chain — fall through to
|
|
1026
|
+
// the normal retry path so we at least exhaust attempts before
|
|
1027
|
+
// surrender.
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
996
1030
|
recoveryAttempts++;
|
|
997
1031
|
const backoffMs = getBackoffDelay(recoveryAttempts);
|
|
998
1032
|
if (config.debug) {
|
|
999
1033
|
console.error(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
|
|
1000
1034
|
}
|
|
1035
|
+
// Surface the actual error + model so the user can see which model
|
|
1036
|
+
// is failing and what the upstream said. Old "Retrying after Server
|
|
1037
|
+
// error" was uninformative — users couldn't tell whether to wait,
|
|
1038
|
+
// /retry, or /model-switch.
|
|
1039
|
+
const errSnippet = errMsg.replace(/\s+/g, ' ').slice(0, 100);
|
|
1001
1040
|
onEvent({
|
|
1002
1041
|
kind: 'text_delta',
|
|
1003
|
-
text: `\n*Retrying
|
|
1042
|
+
text: `\n*Retrying ${recoveryAttempts}/${effectiveMaxRetries} on ${resolvedModel} — ${classified.label}: ${errSnippet}*\n`,
|
|
1004
1043
|
});
|
|
1005
1044
|
await new Promise(r => setTimeout(r, backoffMs));
|
|
1006
1045
|
continue;
|
package/dist/tools/webfetch.js
CHANGED
|
@@ -58,6 +58,22 @@ async function execute(input, ctx) {
|
|
|
58
58
|
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
59
59
|
return { output: `Error: only http/https URLs are supported`, isError: true };
|
|
60
60
|
}
|
|
61
|
+
// ── Pre-flight: known anti-bot domains ──
|
|
62
|
+
// Sites that systematically block scripted access return 403 / 429 /
|
|
63
|
+
// captcha challenges to plain GET requests no matter what UA we send.
|
|
64
|
+
// Without this guard the model burns multiple turns retrying variations
|
|
65
|
+
// (Zillow → /research/austin-tx, /homedetails/X, /sold/Y...) that all
|
|
66
|
+
// 403 the same way, padding the step counter and the user's bill.
|
|
67
|
+
// Short-circuiting here returns a single actionable error instead.
|
|
68
|
+
const blocked = isBlockedDomain(parsed.hostname);
|
|
69
|
+
if (blocked) {
|
|
70
|
+
return {
|
|
71
|
+
output: `${parsed.hostname} systematically blocks automated fetch (${blocked.reason}). ` +
|
|
72
|
+
`Switch tools: ${blocked.alternative}. Don't retry this URL with WebFetch — ` +
|
|
73
|
+
`every variant of the same hostname returns the same block.`,
|
|
74
|
+
isError: true,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
61
77
|
const maxLen = Math.min(max_length ?? DEFAULT_MAX_LENGTH, MAX_BODY_BYTES);
|
|
62
78
|
// ── YouTube special case ──
|
|
63
79
|
// Plain HTML fetch on a youtube.com URL returns the SPA bundle (a wall of
|
|
@@ -108,8 +124,19 @@ async function execute(input, ctx) {
|
|
|
108
124
|
redirect: 'follow',
|
|
109
125
|
});
|
|
110
126
|
if (!response.ok) {
|
|
127
|
+
// 403 / 429 from a domain not in the static block list often still
|
|
128
|
+
// means anti-bot — many sites tier their detection (first hit OK,
|
|
129
|
+
// subsequent ones blocked) or rely on UA fingerprinting. Surface
|
|
130
|
+
// this as an actionable hint so the model switches strategy
|
|
131
|
+
// instead of retrying the same URL with a different path.
|
|
132
|
+
const isAntiBot = response.status === 403 || response.status === 429 ||
|
|
133
|
+
response.status === 503;
|
|
134
|
+
const hint = isAntiBot
|
|
135
|
+
? ` — ${parsed.hostname} likely blocks automated fetch. Try WebSearch for the same query, ` +
|
|
136
|
+
`or fetch a different domain that publishes the same data.`
|
|
137
|
+
: '';
|
|
111
138
|
return {
|
|
112
|
-
output: `HTTP ${response.status} ${response.statusText} for ${url}`,
|
|
139
|
+
output: `HTTP ${response.status} ${response.statusText} for ${url}${hint}`,
|
|
113
140
|
isError: true,
|
|
114
141
|
};
|
|
115
142
|
}
|
|
@@ -176,6 +203,76 @@ async function execute(input, ctx) {
|
|
|
176
203
|
ctx.abortSignal.removeEventListener('abort', onAbort);
|
|
177
204
|
}
|
|
178
205
|
}
|
|
206
|
+
const BLOCKED_DOMAINS = [
|
|
207
|
+
{
|
|
208
|
+
pattern: /(^|\.)zillow\.com$/i,
|
|
209
|
+
reason: '403 to all non-browser GETs',
|
|
210
|
+
alternative: 'use WebSearch for "Austin TX home price trends" or similar',
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
pattern: /(^|\.)redfin\.com$/i,
|
|
214
|
+
reason: '403 / captcha challenge to scripted requests',
|
|
215
|
+
alternative: 'use WebSearch with the property address or zip code',
|
|
216
|
+
},
|
|
217
|
+
{
|
|
218
|
+
pattern: /(^|\.)realtor\.com$/i,
|
|
219
|
+
reason: '403 / interstitial to non-browser UAs',
|
|
220
|
+
alternative: 'use WebSearch',
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
pattern: /(^|\.)linkedin\.com$/i,
|
|
224
|
+
reason: 'auth wall on every page',
|
|
225
|
+
alternative: 'use SearchX (X is the better discovery surface for the same people) or WebSearch',
|
|
226
|
+
},
|
|
227
|
+
{
|
|
228
|
+
pattern: /(^|\.)instagram\.com$/i,
|
|
229
|
+
reason: 'auth wall + 401 to public profile fetches',
|
|
230
|
+
alternative: 'use WebSearch for the username',
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
pattern: /(^|\.)facebook\.com$/i,
|
|
234
|
+
reason: 'auth wall on most public content',
|
|
235
|
+
alternative: 'use WebSearch',
|
|
236
|
+
},
|
|
237
|
+
{
|
|
238
|
+
pattern: /(^|\.)x\.com$/i,
|
|
239
|
+
reason: 'X.com requires authenticated API',
|
|
240
|
+
alternative: 'use SearchX (the dedicated X tool) instead of WebFetch',
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
pattern: /(^|\.)twitter\.com$/i,
|
|
244
|
+
reason: 'X.com requires authenticated API',
|
|
245
|
+
alternative: 'use SearchX (the dedicated X tool) instead of WebFetch',
|
|
246
|
+
},
|
|
247
|
+
{
|
|
248
|
+
pattern: /(^|\.)tiktok\.com$/i,
|
|
249
|
+
reason: 'returns SPA shell + JS challenge',
|
|
250
|
+
alternative: 'use WebSearch with the @username',
|
|
251
|
+
},
|
|
252
|
+
{
|
|
253
|
+
pattern: /(^|\.)reuters\.com$/i,
|
|
254
|
+
reason: 'paywall + bot detection',
|
|
255
|
+
alternative: 'use WebSearch which surfaces cached headlines',
|
|
256
|
+
},
|
|
257
|
+
{
|
|
258
|
+
pattern: /(^|\.)bloomberg\.com$/i,
|
|
259
|
+
reason: 'paywall + bot detection',
|
|
260
|
+
alternative: 'use WebSearch for the same story',
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
pattern: /(^|\.)wsj\.com$/i,
|
|
264
|
+
reason: 'paywall',
|
|
265
|
+
alternative: 'use WebSearch for the same story',
|
|
266
|
+
},
|
|
267
|
+
];
|
|
268
|
+
function isBlockedDomain(hostname) {
|
|
269
|
+
for (const entry of BLOCKED_DOMAINS) {
|
|
270
|
+
if (entry.pattern.test(hostname)) {
|
|
271
|
+
return { reason: entry.reason, alternative: entry.alternative };
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
return null;
|
|
275
|
+
}
|
|
179
276
|
// ─── YouTube transcript fetcher ─────────────────────────────────────────────
|
|
180
277
|
// Fetches auto-generated or uploaded captions for a YouTube video by parsing
|
|
181
278
|
// the watch-page's `ytInitialPlayerResponse` JSON. Pure HTTP, no deps. Saves
|
package/package.json
CHANGED