@blockrun/franklin 3.8.36 → 3.8.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/evaluator.d.ts +3 -1
- package/dist/agent/evaluator.js +44 -8
- package/dist/agent/llm.js +2 -2
- package/dist/proxy/server.js +143 -23
- package/package.json +1 -1
|
@@ -31,7 +31,9 @@ export interface GroundingResult {
|
|
|
31
31
|
}
|
|
32
32
|
/**
|
|
33
33
|
* Decide whether this turn warrants a grounding check. Principles:
|
|
34
|
-
* - Non-trivial user input (not a greeting, not a slash command)
|
|
34
|
+
* - Non-trivial user input (not a greeting, not a slash command), OR
|
|
35
|
+
* the assistant answer contains specific factual claims (numbers + units,
|
|
36
|
+
* currency, dates, times) regardless of input length
|
|
35
37
|
* - Non-trivial assistant text output (not just a tool-result echo)
|
|
36
38
|
*
|
|
37
39
|
* Intentionally NOT gating on tool-type (read vs write) — the whole point
|
package/dist/agent/evaluator.js
CHANGED
|
@@ -71,11 +71,19 @@ If not GROUNDED, list each issue on its own line starting with "- " and the tool
|
|
|
71
71
|
|
|
72
72
|
Empty line between verdict and list. No other text. No preamble. No apology. Be terse.`;
|
|
73
73
|
// ─── Trigger policy ──────────────────────────────────────────────────────
|
|
74
|
-
const MIN_USER_CHARS =
|
|
74
|
+
const MIN_USER_CHARS = 3; // "hi"/"ok"/"no" skip; "BTC"/"21044" do not
|
|
75
75
|
const MIN_ANSWER_CHARS = 50; // Short answers are acks, not factual claims
|
|
76
|
+
// Factual-content patterns: digits paired with units, currency, dates, or
|
|
77
|
+
// percent/temperature/time signs. If the assistant emitted any of these in
|
|
78
|
+
// a >= MIN_ANSWER_CHARS reply, we check grounding regardless of how short
|
|
79
|
+
// the user's input was — a 5-char ZIP code "21044" can elicit a fabricated
|
|
80
|
+
// weather paragraph, and the original user-length gate let that through.
|
|
81
|
+
const FACTUAL_PATTERN = /(\$\s*\d|\d[\d,]*\s*(?:°[CF]?|%|km|mi|miles?|mph|kph|kg|lbs?|ft|in|cm|hours?|hrs?|minutes?|mins?|seconds?|secs?|GB|MB|KB|TB|USD|EUR|CNY|JPY|BTC|ETH|SOL)|\b(?:19|20)\d{2}-\d{1,2}-\d{1,2}\b|\b\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)?\b|\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}\b)/;
|
|
76
82
|
/**
|
|
77
83
|
* Decide whether this turn warrants a grounding check. Principles:
|
|
78
|
-
* - Non-trivial user input (not a greeting, not a slash command)
|
|
84
|
+
* - Non-trivial user input (not a greeting, not a slash command), OR
|
|
85
|
+
* the assistant answer contains specific factual claims (numbers + units,
|
|
86
|
+
* currency, dates, times) regardless of input length
|
|
79
87
|
* - Non-trivial assistant text output (not just a tool-result echo)
|
|
80
88
|
*
|
|
81
89
|
* Intentionally NOT gating on tool-type (read vs write) — the whole point
|
|
@@ -85,11 +93,17 @@ export function shouldCheckGrounding(userInput, assistantText) {
|
|
|
85
93
|
if (process.env.FRANKLIN_NO_EVAL === '1')
|
|
86
94
|
return false;
|
|
87
95
|
const ui = userInput.trim();
|
|
88
|
-
if (ui.length < MIN_USER_CHARS)
|
|
89
|
-
return false;
|
|
90
96
|
if (ui.startsWith('/'))
|
|
91
97
|
return false;
|
|
92
|
-
|
|
98
|
+
const at = assistantText.trim();
|
|
99
|
+
if (at.length < MIN_ANSWER_CHARS)
|
|
100
|
+
return false;
|
|
101
|
+
// If the answer looks factual (numbers + units, dates, prices), check
|
|
102
|
+
// even when the user's prompt was a single token. The 21044 zip-code
|
|
103
|
+
// case lived here.
|
|
104
|
+
if (FACTUAL_PATTERN.test(at))
|
|
105
|
+
return true;
|
|
106
|
+
if (ui.length < MIN_USER_CHARS)
|
|
93
107
|
return false;
|
|
94
108
|
return true;
|
|
95
109
|
}
|
|
@@ -294,15 +308,37 @@ export function renderGroundingFollowup(result) {
|
|
|
294
308
|
* history; we only need to name the gap + the tools to use.
|
|
295
309
|
*/
|
|
296
310
|
export function buildGroundingRetryInstruction(result, originalUserQuestion) {
|
|
311
|
+
// Pull the named missing tools out of the evaluator's issue list so we
|
|
312
|
+
// can name them in the imperative. The evaluator outputs lines like
|
|
313
|
+
// Claim: "..." → missing tool: WebSearch
|
|
314
|
+
// grab the bit after "missing tool:" / "should have called:".
|
|
315
|
+
const namedTools = new Set();
|
|
316
|
+
for (const issue of result.issues) {
|
|
317
|
+
const m = issue.match(/(?:missing tool|should have called):\s*([A-Za-z][\w| ,/-]*)/i);
|
|
318
|
+
if (m) {
|
|
319
|
+
for (const tok of m[1].split(/[|,/]/)) {
|
|
320
|
+
const t = tok.trim().split(/\s+/)[0];
|
|
321
|
+
if (t && t !== '...' && t !== '(or')
|
|
322
|
+
namedTools.add(t);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
const toolList = namedTools.size > 0
|
|
327
|
+
? Array.from(namedTools).join(', ')
|
|
328
|
+
: '(see the missing-tool fields in the issues above)';
|
|
297
329
|
const lines = [
|
|
298
|
-
'[GROUNDING CHECK FAILED]',
|
|
299
|
-
'Your previous answer stated facts without calling
|
|
330
|
+
'[GROUNDING CHECK FAILED — RETRY ROUND]',
|
|
331
|
+
'Your previous answer stated facts without calling tools. Specifically:',
|
|
300
332
|
];
|
|
301
333
|
for (const issue of result.issues) {
|
|
302
334
|
lines.push(`- ${issue}`);
|
|
303
335
|
}
|
|
304
336
|
lines.push('');
|
|
305
|
-
lines.push('
|
|
337
|
+
lines.push('## What you must do this round');
|
|
338
|
+
lines.push(`1. **Call these tools first**, before any prose: ${toolList}.`);
|
|
339
|
+
lines.push('2. **Do not write a single factual sentence until the tool results return.** No restatement of the prior answer, no hedging, no "based on general knowledge".');
|
|
340
|
+
lines.push('3. **Do NOT invent source names** (no fake URLs, no fabricated citation domains, no "per Trippy" / "per drivvin.com" — if you cite a source, it must come from a tool result you just ran).');
|
|
341
|
+
lines.push('4. After tools return, write a concise answer that ONLY restates what the tool outputs say. If a result is partial or a tool failed, say so explicitly — do not paper over with memory.');
|
|
306
342
|
lines.push('');
|
|
307
343
|
lines.push(`Original user question: ${originalUserQuestion.trim().slice(0, 500)}`);
|
|
308
344
|
return lines.join('\n');
|
package/dist/agent/llm.js
CHANGED
|
@@ -15,12 +15,12 @@ function parseTimeoutEnv(name) {
|
|
|
15
15
|
function getModelRequestTimeoutMs() {
|
|
16
16
|
return (parseTimeoutEnv('FRANKLIN_MODEL_REQUEST_TIMEOUT_MS') ??
|
|
17
17
|
parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
|
|
18
|
-
|
|
18
|
+
45_000);
|
|
19
19
|
}
|
|
20
20
|
function getModelStreamIdleTimeoutMs() {
|
|
21
21
|
return (parseTimeoutEnv('FRANKLIN_MODEL_STREAM_IDLE_TIMEOUT_MS') ??
|
|
22
22
|
parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
|
|
23
|
-
|
|
23
|
+
90_000);
|
|
24
24
|
}
|
|
25
25
|
function linkAbortSignal(parent, child) {
|
|
26
26
|
if (!parent)
|
package/dist/proxy/server.js
CHANGED
|
@@ -5,7 +5,7 @@ import os from 'node:os';
|
|
|
5
5
|
import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
|
|
6
6
|
import { recordUsage } from '../stats/tracker.js';
|
|
7
7
|
import { appendAudit } from '../stats/audit.js';
|
|
8
|
-
import {
|
|
8
|
+
import { buildFallbackChain, DEFAULT_FALLBACK_CONFIG, ROUTING_PROFILES, } from './fallback.js';
|
|
9
9
|
import { routeRequest, parseRoutingProfile, } from '../router/index.js';
|
|
10
10
|
import { estimateCost } from '../pricing.js';
|
|
11
11
|
import { VERSION } from '../config.js';
|
|
@@ -41,6 +41,57 @@ function log(...args) {
|
|
|
41
41
|
catch { /* ignore */ }
|
|
42
42
|
}
|
|
43
43
|
const DEFAULT_MAX_TOKENS = 4096;
|
|
44
|
+
const DEFAULT_PROXY_REQUEST_TIMEOUT_MS = 45_000;
|
|
45
|
+
const DEFAULT_PROXY_STREAM_TIMEOUT_MS = 5 * 60 * 1000;
|
|
46
|
+
function parseTimeoutEnv(name, fallback) {
|
|
47
|
+
const raw = process.env[name];
|
|
48
|
+
if (!raw)
|
|
49
|
+
return fallback;
|
|
50
|
+
const parsed = Number.parseInt(raw, 10);
|
|
51
|
+
return Number.isFinite(parsed) && parsed >= 0 ? parsed : fallback;
|
|
52
|
+
}
|
|
53
|
+
function getProxyRequestTimeoutMs() {
|
|
54
|
+
return parseTimeoutEnv('FRANKLIN_PROXY_REQUEST_TIMEOUT_MS', DEFAULT_PROXY_REQUEST_TIMEOUT_MS);
|
|
55
|
+
}
|
|
56
|
+
function getProxyStreamTimeoutMs() {
|
|
57
|
+
return parseTimeoutEnv('FRANKLIN_PROXY_STREAM_TIMEOUT_MS', DEFAULT_PROXY_STREAM_TIMEOUT_MS);
|
|
58
|
+
}
|
|
59
|
+
function createProxyTimeoutError(label, timeoutMs) {
|
|
60
|
+
return new Error(`${label} timed out after ${timeoutMs}ms`);
|
|
61
|
+
}
|
|
62
|
+
async function fetchWithTimeout(url, init, timeoutMs, label) {
|
|
63
|
+
if (timeoutMs <= 0)
|
|
64
|
+
return fetch(url, init);
|
|
65
|
+
const controller = new AbortController();
|
|
66
|
+
const timeoutError = createProxyTimeoutError(label, timeoutMs);
|
|
67
|
+
const timeout = setTimeout(() => {
|
|
68
|
+
try {
|
|
69
|
+
controller.abort(timeoutError);
|
|
70
|
+
}
|
|
71
|
+
catch { /* ignore */ }
|
|
72
|
+
}, timeoutMs);
|
|
73
|
+
try {
|
|
74
|
+
return await fetch(url, { ...init, signal: controller.signal });
|
|
75
|
+
}
|
|
76
|
+
catch (err) {
|
|
77
|
+
if (controller.signal.aborted)
|
|
78
|
+
throw timeoutError;
|
|
79
|
+
throw err;
|
|
80
|
+
}
|
|
81
|
+
finally {
|
|
82
|
+
clearTimeout(timeout);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
function replaceModelInBody(body, model) {
|
|
86
|
+
try {
|
|
87
|
+
const parsed = JSON.parse(body);
|
|
88
|
+
parsed.model = model;
|
|
89
|
+
return JSON.stringify(parsed);
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
return body;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
44
95
|
// Per-model last output tokens for adaptive max_tokens (avoids cross-request pollution)
|
|
45
96
|
const MAX_TRACKED_MODELS = 50;
|
|
46
97
|
const lastOutputByModel = new Map();
|
|
@@ -369,13 +420,21 @@ export function createProxy(options) {
|
|
|
369
420
|
};
|
|
370
421
|
let response;
|
|
371
422
|
let finalModel = requestModel;
|
|
423
|
+
const requestTimeoutMs = getProxyRequestTimeoutMs();
|
|
372
424
|
// Use fallback chain if enabled
|
|
373
425
|
if (fallbackEnabled && body && requestPath.includes('messages')) {
|
|
374
426
|
const fallbackConfig = {
|
|
375
427
|
...DEFAULT_FALLBACK_CONFIG,
|
|
376
428
|
chain: buildFallbackChain(requestModel),
|
|
377
429
|
};
|
|
378
|
-
const result = await
|
|
430
|
+
const result = await fetchWithPaymentFallback(targetUrl, requestInit, body, fallbackConfig, {
|
|
431
|
+
method: req.method || 'POST',
|
|
432
|
+
headers,
|
|
433
|
+
chain,
|
|
434
|
+
baseWallet,
|
|
435
|
+
solanaWallet,
|
|
436
|
+
timeoutMs: requestTimeoutMs,
|
|
437
|
+
}, (failedModel, status, nextModel) => {
|
|
379
438
|
log(`⚠️ ${failedModel} returned ${status}, falling back to ${nextModel}`);
|
|
380
439
|
});
|
|
381
440
|
response = result.response;
|
|
@@ -388,20 +447,14 @@ export function createProxy(options) {
|
|
|
388
447
|
}
|
|
389
448
|
}
|
|
390
449
|
else {
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
if (chain === 'solana' && solanaWallet) {
|
|
400
|
-
response = await handleSolanaPayment(response, targetUrl, req.method || 'POST', headers, body, solanaWallet.privateKey, solanaWallet.address);
|
|
401
|
-
}
|
|
402
|
-
else if (baseWallet) {
|
|
403
|
-
response = await handleBasePayment(response, targetUrl, req.method || 'POST', headers, body, baseWallet.privateKey, baseWallet.address);
|
|
404
|
-
}
|
|
450
|
+
response = await fetchModelAttempt(targetUrl, requestInit, body, requestModel, {
|
|
451
|
+
method: req.method || 'POST',
|
|
452
|
+
headers,
|
|
453
|
+
chain,
|
|
454
|
+
baseWallet,
|
|
455
|
+
solanaWallet,
|
|
456
|
+
timeoutMs: requestTimeoutMs,
|
|
457
|
+
});
|
|
405
458
|
}
|
|
406
459
|
const responseHeaders = {};
|
|
407
460
|
response.headers.forEach((v, k) => {
|
|
@@ -452,7 +505,7 @@ export function createProxy(options) {
|
|
|
452
505
|
const decoder = new TextDecoder();
|
|
453
506
|
let fullResponse = '';
|
|
454
507
|
const STREAM_CAP = 5_000_000; // 5MB cap on accumulated stream
|
|
455
|
-
const STREAM_TIMEOUT_MS =
|
|
508
|
+
const STREAM_TIMEOUT_MS = getProxyStreamTimeoutMs();
|
|
456
509
|
const streamDeadline = Date.now() + STREAM_TIMEOUT_MS;
|
|
457
510
|
const pump = async () => {
|
|
458
511
|
while (true) {
|
|
@@ -563,10 +616,77 @@ export function createProxy(options) {
|
|
|
563
616
|
});
|
|
564
617
|
return server;
|
|
565
618
|
}
|
|
619
|
+
async function fetchModelAttempt(url, init, body, model, payment) {
|
|
620
|
+
let response = await fetchWithTimeout(url, { ...init, body: body || undefined }, payment.timeoutMs, `Proxy request for ${model}`);
|
|
621
|
+
if (response.status !== 402)
|
|
622
|
+
return response;
|
|
623
|
+
if (payment.chain === 'solana' && payment.solanaWallet) {
|
|
624
|
+
return handleSolanaPayment(response, url, payment.method, payment.headers, body, payment.solanaWallet.privateKey, payment.solanaWallet.address, payment.timeoutMs, model);
|
|
625
|
+
}
|
|
626
|
+
if (payment.baseWallet) {
|
|
627
|
+
return handleBasePayment(response, url, payment.method, payment.headers, body, payment.baseWallet.privateKey, payment.baseWallet.address, payment.timeoutMs, model);
|
|
628
|
+
}
|
|
629
|
+
return response;
|
|
630
|
+
}
|
|
631
|
+
/**
|
|
632
|
+
* Try each fallback model as a full x402 attempt:
|
|
633
|
+
* unpaid 402 probe, payment signing, then the paid provider call. The older
|
|
634
|
+
* flow only applied fallback to the probe, which meant a slow paid call could
|
|
635
|
+
* hang Franklin until the outer client gave up.
|
|
636
|
+
*/
|
|
637
|
+
async function fetchWithPaymentFallback(url, init, originalBody, config, payment, onFallback) {
|
|
638
|
+
const failedModels = [];
|
|
639
|
+
let attempts = 0;
|
|
640
|
+
for (let i = 0; i < config.chain.length && attempts < config.maxRetries; i++) {
|
|
641
|
+
const model = config.chain[i];
|
|
642
|
+
const body = replaceModelInBody(originalBody, model);
|
|
643
|
+
try {
|
|
644
|
+
attempts++;
|
|
645
|
+
const response = await fetchModelAttempt(url, init, body, model, payment);
|
|
646
|
+
if (!config.retryOn.includes(response.status)) {
|
|
647
|
+
return {
|
|
648
|
+
response,
|
|
649
|
+
modelUsed: model,
|
|
650
|
+
bodyUsed: body,
|
|
651
|
+
fallbackUsed: i > 0,
|
|
652
|
+
attemptsCount: attempts,
|
|
653
|
+
failedModels,
|
|
654
|
+
};
|
|
655
|
+
}
|
|
656
|
+
try {
|
|
657
|
+
await response.body?.cancel();
|
|
658
|
+
}
|
|
659
|
+
catch { /* ignore */ }
|
|
660
|
+
failedModels.push(model);
|
|
661
|
+
const nextModel = config.chain[i + 1];
|
|
662
|
+
if (nextModel && onFallback) {
|
|
663
|
+
onFallback(model, response.status, nextModel);
|
|
664
|
+
}
|
|
665
|
+
if (i < config.chain.length - 1) {
|
|
666
|
+
await sleep(config.retryDelayMs);
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
catch (err) {
|
|
670
|
+
failedModels.push(model);
|
|
671
|
+
const nextModel = config.chain[i + 1];
|
|
672
|
+
if (nextModel && onFallback) {
|
|
673
|
+
onFallback(model, 0, nextModel);
|
|
674
|
+
}
|
|
675
|
+
log(`[fallback] ${model} request error: ${err instanceof Error ? err.message : String(err)}`);
|
|
676
|
+
if (i < config.chain.length - 1) {
|
|
677
|
+
await sleep(config.retryDelayMs);
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
throw new Error(`All models in fallback chain failed: ${failedModels.join(', ')}`);
|
|
682
|
+
}
|
|
683
|
+
function sleep(ms) {
|
|
684
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
685
|
+
}
|
|
566
686
|
// ======================================================================
|
|
567
687
|
// Base (EIP-712) payment handler
|
|
568
688
|
// ======================================================================
|
|
569
|
-
async function handleBasePayment(response, url, method, headers, body, privateKey, fromAddress) {
|
|
689
|
+
async function handleBasePayment(response, url, method, headers, body, privateKey, fromAddress, timeoutMs = getProxyRequestTimeoutMs(), model = 'unknown') {
|
|
570
690
|
const paymentHeader = await extractPaymentHeader(response);
|
|
571
691
|
if (!paymentHeader) {
|
|
572
692
|
throw new Error('402 Payment Required — wallet may need funding. Run: franklin balance');
|
|
@@ -579,19 +699,19 @@ async function handleBasePayment(response, url, method, headers, body, privateKe
|
|
|
579
699
|
maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
|
|
580
700
|
extra: details.extra,
|
|
581
701
|
});
|
|
582
|
-
return
|
|
702
|
+
return fetchWithTimeout(url, {
|
|
583
703
|
method,
|
|
584
704
|
headers: {
|
|
585
705
|
...headers,
|
|
586
706
|
'PAYMENT-SIGNATURE': paymentPayload,
|
|
587
707
|
},
|
|
588
708
|
body: body || undefined,
|
|
589
|
-
});
|
|
709
|
+
}, timeoutMs, `Paid proxy request for ${model}`);
|
|
590
710
|
}
|
|
591
711
|
// ======================================================================
|
|
592
712
|
// Solana payment handler
|
|
593
713
|
// ======================================================================
|
|
594
|
-
async function handleSolanaPayment(response, url, method, headers, body, privateKey, fromAddress) {
|
|
714
|
+
async function handleSolanaPayment(response, url, method, headers, body, privateKey, fromAddress, timeoutMs = getProxyRequestTimeoutMs(), model = 'unknown') {
|
|
595
715
|
const paymentHeader = await extractPaymentHeader(response);
|
|
596
716
|
if (!paymentHeader) {
|
|
597
717
|
throw new Error('402 Payment Required — wallet may need funding. Run: franklin balance');
|
|
@@ -606,14 +726,14 @@ async function handleSolanaPayment(response, url, method, headers, body, private
|
|
|
606
726
|
maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
|
|
607
727
|
extra: details.extra,
|
|
608
728
|
});
|
|
609
|
-
return
|
|
729
|
+
return fetchWithTimeout(url, {
|
|
610
730
|
method,
|
|
611
731
|
headers: {
|
|
612
732
|
...headers,
|
|
613
733
|
'PAYMENT-SIGNATURE': paymentPayload,
|
|
614
734
|
},
|
|
615
735
|
body: body || undefined,
|
|
616
|
-
});
|
|
736
|
+
}, timeoutMs, `Paid proxy request for ${model}`);
|
|
617
737
|
}
|
|
618
738
|
export function classifyRequest(body) {
|
|
619
739
|
try {
|
package/package.json
CHANGED