@blockrun/franklin 3.9.4 → 3.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent/context.js
CHANGED
|
@@ -174,6 +174,7 @@ function getToolPatternsSection() {
|
|
|
174
174
|
- **Research**: WebSearch for discovery → WebFetch for specific URLs from search results. Don't WebFetch URLs you invented.
|
|
175
175
|
- **Complex tasks**: Use Agent to spawn sub-agents for 2+ independent research or implementation tasks. Don't do sequentially what can be done in parallel.
|
|
176
176
|
- **Multiple independent lookups**: Call all tools in a single response. NEVER make sequential calls when parallel calls would work.
|
|
177
|
+
- **Long-running iteration (>20 items)**: Do NOT loop in the agent (one tool call per item burns turns and trips timeouts on the 21st item). Instead: Write a script (Node/Bash/Python), have it iterate with a checkpoint file (\`./.franklin/<task>.checkpoint.json\` storing cursor + processedCount), then Bash it once. The agent re-engages only on errors or completion. Pattern fits paginated APIs, batch enrichment, large CSV emit, anything where the loop body is deterministic. The agent's job is to design and orchestrate, not to be the for-loop.
|
|
177
178
|
|
|
178
179
|
# Grounding Before Answering
|
|
179
180
|
Your training data is frozen in the past. Live-world questions MUST be answered from tool results, not memory.
|
package/dist/agent/llm.js
CHANGED
|
@@ -7,15 +7,25 @@ import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, creat
|
|
|
7
7
|
import { USER_AGENT } from '../config.js';
|
|
8
8
|
import { routeRequest, parseRoutingProfile } from '../router/index.js';
|
|
9
9
|
import { ThinkTagStripper } from './think-tag-stripper.js';
|
|
10
|
+
import { isNemotronProseModel, stripNemotronProse } from './nemotron-prose-stripper.js';
|
|
10
11
|
function parseTimeoutEnv(name) {
|
|
11
12
|
const raw = process.env[name];
|
|
12
13
|
const parsed = raw ? Number.parseInt(raw, 10) : NaN;
|
|
13
14
|
return Number.isFinite(parsed) && parsed >= 0 ? parsed : null;
|
|
14
15
|
}
|
|
15
16
|
function getModelRequestTimeoutMs() {
|
|
17
|
+
// 180s budget for *time-to-headers* (the gateway flushes SSE headers only
|
|
18
|
+
// once the upstream model emits its first token). Reasoning-class models
|
|
19
|
+
// (zai/glm-*, nemotron *-reasoning, deepseek-r*, gpt-5-codex, anthropic
|
|
20
|
+
// extended-thinking) routinely take 60–120s to first token on cache-cold
|
|
21
|
+
// prompts or when the gateway is under load — the old 45s default cut
|
|
22
|
+
// those off and wasted USDC on retries that hit the same wall. 180s is
|
|
23
|
+
// generous enough for any realistic first-token latency, still bounded
|
|
24
|
+
// enough that genuinely dead requests surface within ~6 min after the
|
|
25
|
+
// single timeout retry.
|
|
16
26
|
return (parseTimeoutEnv('FRANKLIN_MODEL_REQUEST_TIMEOUT_MS') ??
|
|
17
27
|
parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
|
|
18
|
-
|
|
28
|
+
180_000);
|
|
19
29
|
}
|
|
20
30
|
function getModelStreamIdleTimeoutMs() {
|
|
21
31
|
return (parseTimeoutEnv('FRANKLIN_MODEL_STREAM_IDLE_TIMEOUT_MS') ??
|
|
@@ -420,6 +430,7 @@ export class ModelClient {
|
|
|
420
430
|
let currentToolName = '';
|
|
421
431
|
let currentToolInput = '';
|
|
422
432
|
const textEmission = { mode: 'undecided' };
|
|
433
|
+
const isNemotronProse = isNemotronProseModel(request.model);
|
|
423
434
|
// Split inline <think>…</think> emitted by reasoning models (nemotron,
|
|
424
435
|
// deepseek-r1, qwq, etc.) that use the text field instead of the native
|
|
425
436
|
// thinking block. Thinking emitted this way is display-only — we don't
|
|
@@ -439,7 +450,9 @@ export class ModelClient {
|
|
|
439
450
|
const trimmed = currentText.trimStart();
|
|
440
451
|
if (!trimmed)
|
|
441
452
|
return;
|
|
442
|
-
|
|
453
|
+
// Nemotron Omni leaks reasoning prose into the text channel without
|
|
454
|
+
// <think> tags. Hold the buffer for end-of-stream stripping.
|
|
455
|
+
textEmission.mode = isNemotronProse || trimmed.startsWith('{') ? 'hold' : 'stream';
|
|
443
456
|
if (textEmission.mode === 'stream') {
|
|
444
457
|
onStreamDelta?.({ type: 'text', text: currentText });
|
|
445
458
|
}
|
|
@@ -585,6 +598,13 @@ export class ModelClient {
|
|
|
585
598
|
'Treating it as non-productive output so recovery can try another model.');
|
|
586
599
|
}
|
|
587
600
|
}
|
|
601
|
+
else if (textEmission.mode === 'hold' && isNemotronProse) {
|
|
602
|
+
const { thinking, answer } = stripNemotronProse(currentText);
|
|
603
|
+
if (thinking)
|
|
604
|
+
onStreamDelta?.({ type: 'thinking', text: thinking });
|
|
605
|
+
onStreamDelta?.({ type: 'text', text: answer });
|
|
606
|
+
collected.push({ type: 'text', text: answer });
|
|
607
|
+
}
|
|
588
608
|
else {
|
|
589
609
|
if (textEmission.mode !== 'stream') {
|
|
590
610
|
onStreamDelta?.({ type: 'text', text: currentText });
|
|
@@ -646,6 +666,13 @@ export class ModelClient {
|
|
|
646
666
|
'Treating it as non-productive output so recovery can try another model.');
|
|
647
667
|
}
|
|
648
668
|
}
|
|
669
|
+
else if (textEmission.mode === 'hold' && isNemotronProse) {
|
|
670
|
+
const { thinking, answer } = stripNemotronProse(currentText);
|
|
671
|
+
if (thinking)
|
|
672
|
+
onStreamDelta?.({ type: 'thinking', text: thinking });
|
|
673
|
+
onStreamDelta?.({ type: 'text', text: answer });
|
|
674
|
+
collected.push({ type: 'text', text: answer });
|
|
675
|
+
}
|
|
649
676
|
else {
|
|
650
677
|
if (textEmission.mode !== 'stream') {
|
|
651
678
|
onStreamDelta?.({ type: 'text', text: currentText });
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Strip leaked reasoning prose from Nemotron-family models.
|
|
3
|
+
*
|
|
4
|
+
* NVIDIA's Nemotron Omni reasoning model emits its chain of thought as plain
|
|
5
|
+
* text — without `<think>` tags or a separate reasoning_content channel — so
|
|
6
|
+
* the think-tag stripper can't catch it. The reasoning prose is then concatenated
|
|
7
|
+
* directly with the answer (often without even a separator), e.g.:
|
|
8
|
+
*
|
|
9
|
+
* "The user asks: ... According to instructions, we must obey. Just output
|
|
10
|
+
* the tokenOMNI_E2E_OK"
|
|
11
|
+
*
|
|
12
|
+
* This module detects the reasoning preamble (heuristic: leading sentence
|
|
13
|
+
* matches a known meta-reasoning opener) and strips everything up to and
|
|
14
|
+
* including the last "answer-introducer" phrase ("just output the token",
|
|
15
|
+
* "the answer is:", "output:", etc.). The stripped portion is returned as
|
|
16
|
+
* `thinking` so it can be routed to the thinking display channel; the
|
|
17
|
+
* remainder is the user-facing `answer`.
|
|
18
|
+
*/
|
|
19
|
+
export declare function isNemotronProseModel(model: string): boolean;
|
|
20
|
+
export declare function stripNemotronProse(text: string): {
|
|
21
|
+
thinking: string;
|
|
22
|
+
answer: string;
|
|
23
|
+
};
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Strip leaked reasoning prose from Nemotron-family models.
|
|
3
|
+
*
|
|
4
|
+
* NVIDIA's Nemotron Omni reasoning model emits its chain of thought as plain
|
|
5
|
+
* text — without `<think>` tags or a separate reasoning_content channel — so
|
|
6
|
+
* the think-tag stripper can't catch it. The reasoning prose is then concatenated
|
|
7
|
+
* directly with the answer (often without even a separator), e.g.:
|
|
8
|
+
*
|
|
9
|
+
* "The user asks: ... According to instructions, we must obey. Just output
|
|
10
|
+
* the tokenOMNI_E2E_OK"
|
|
11
|
+
*
|
|
12
|
+
* This module detects the reasoning preamble (heuristic: leading sentence
|
|
13
|
+
* matches a known meta-reasoning opener) and strips everything up to and
|
|
14
|
+
* including the last "answer-introducer" phrase ("just output the token",
|
|
15
|
+
* "the answer is:", "output:", etc.). The stripped portion is returned as
|
|
16
|
+
* `thinking` so it can be routed to the thinking display channel; the
|
|
17
|
+
* remainder is the user-facing `answer`.
|
|
18
|
+
*/
|
|
19
|
+
const REASONING_OPENERS = [
|
|
20
|
+
/^the user (asks|wants|says|requested|is asking|wants me|wrote|just|said)/i,
|
|
21
|
+
/^looking at (this|the)/i,
|
|
22
|
+
/^based on (the|this)/i,
|
|
23
|
+
/^according to/i,
|
|
24
|
+
/^we (must|should|need)/i,
|
|
25
|
+
/^i (need|should|must|will|'ll|am going to|have to)\s/i,
|
|
26
|
+
/^let me/i,
|
|
27
|
+
/^there'?s? no need/i,
|
|
28
|
+
/^okay,?\s+(the user|so|let|i)/i,
|
|
29
|
+
/^alright,?\s+(the user|so|let|i)/i,
|
|
30
|
+
/^so,?\s+the user/i,
|
|
31
|
+
/^the question (is|asks)/i,
|
|
32
|
+
/^the prompt (is|says|asks)/i,
|
|
33
|
+
];
|
|
34
|
+
const ANSWER_INTRODUCERS = [
|
|
35
|
+
/\bjust\s+(?:output|respond|say|reply|return|emit|write|give|print)\s+(?:the|a|with|out|to|exactly|back|only)?\s*(?:token|word|answer|response|string|text|output|message)?\s*:?\s*/gi,
|
|
36
|
+
/\b(?:the|my)\s+(?:answer|response|token|output|reply)\s+is\s*:?\s*/gi,
|
|
37
|
+
/\bhere'?s?\s+(?:the|my)?\s*(?:response|answer|output|token|reply):?\s*/gi,
|
|
38
|
+
/(?:^|[\s.])(?:output|response|answer|reply|token)\s*:\s*/gi,
|
|
39
|
+
/\bi(?:'ll| will| shall)\s+(?:output|respond|say|reply|return|emit|write|give|print)\s+(?:the|a|with|out|to|exactly|back|only)?\s*(?:token|word|answer|response|string|text|output|message)?\s*:?\s*/gi,
|
|
40
|
+
];
|
|
41
|
+
export function isNemotronProseModel(model) {
|
|
42
|
+
return /^nvidia\/nemotron-3-nano-omni/i.test(model);
|
|
43
|
+
}
|
|
44
|
+
export function stripNemotronProse(text) {
|
|
45
|
+
if (!text)
|
|
46
|
+
return { thinking: '', answer: '' };
|
|
47
|
+
const leadingWhitespaceMatch = text.match(/^\s*/);
|
|
48
|
+
const leadingWhitespace = leadingWhitespaceMatch ? leadingWhitespaceMatch[0] : '';
|
|
49
|
+
const trimmed = text.slice(leadingWhitespace.length);
|
|
50
|
+
if (!trimmed)
|
|
51
|
+
return { thinking: '', answer: text };
|
|
52
|
+
// Reject early: if no reasoning opener at the start, this isn't leaked prose.
|
|
53
|
+
if (!REASONING_OPENERS.some((p) => p.test(trimmed))) {
|
|
54
|
+
return { thinking: '', answer: text };
|
|
55
|
+
}
|
|
56
|
+
let lastEnd = -1;
|
|
57
|
+
for (const re of ANSWER_INTRODUCERS) {
|
|
58
|
+
const matches = [...trimmed.matchAll(re)];
|
|
59
|
+
for (const m of matches) {
|
|
60
|
+
const end = (m.index ?? 0) + m[0].length;
|
|
61
|
+
if (end > lastEnd)
|
|
62
|
+
lastEnd = end;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
if (lastEnd === -1) {
|
|
66
|
+
// Reasoning detected but no transition phrase found. Conservative: leave
|
|
67
|
+
// the text intact rather than swallow what might be a legitimate answer.
|
|
68
|
+
return { thinking: '', answer: text };
|
|
69
|
+
}
|
|
70
|
+
const thinking = leadingWhitespace + trimmed.slice(0, lastEnd);
|
|
71
|
+
const answer = trimmed.slice(lastEnd).replace(/^[\s.,:;\-—]+/, '');
|
|
72
|
+
// Don't return an empty answer — fall back to the original text so the user
|
|
73
|
+
// gets *something* even if our heuristic over-stripped.
|
|
74
|
+
if (!answer)
|
|
75
|
+
return { thinking: '', answer: text };
|
|
76
|
+
return { thinking, answer };
|
|
77
|
+
}
|
package/dist/proxy/server.js
CHANGED
|
@@ -41,7 +41,13 @@ function log(...args) {
|
|
|
41
41
|
catch { /* ignore */ }
|
|
42
42
|
}
|
|
43
43
|
const DEFAULT_MAX_TOKENS = 4096;
|
|
44
|
-
|
|
44
|
+
// 180s budget for *time-to-headers* — reasoning-class models (zai/glm-*,
|
|
45
|
+
// nemotron *-reasoning, deepseek-r*, gpt-5-codex, anthropic extended-thinking)
|
|
46
|
+
// routinely take 60–120s to first token on cache-cold prompts or busy
|
|
47
|
+
// gateways. The old 45s default cut those off and the proxy returned a
|
|
48
|
+
// failed response that downstream agents (Cline, Claude Desktop, etc.) had
|
|
49
|
+
// to retry blindly.
|
|
50
|
+
const DEFAULT_PROXY_REQUEST_TIMEOUT_MS = 180_000;
|
|
45
51
|
const DEFAULT_PROXY_STREAM_TIMEOUT_MS = 5 * 60 * 1000;
|
|
46
52
|
function parseTimeoutEnv(name, fallback) {
|
|
47
53
|
const raw = process.env[name];
|
package/dist/tools/imagegen.js
CHANGED
|
@@ -118,7 +118,7 @@ function buildExecute(deps) {
|
|
|
118
118
|
};
|
|
119
119
|
}
|
|
120
120
|
let imageModel = model || (referenceImage ? 'openai/gpt-image-2' : 'openai/gpt-image-1');
|
|
121
|
-
|
|
121
|
+
let imageSize = size || '1024x1024';
|
|
122
122
|
let chosenPrompt = prompt;
|
|
123
123
|
// Skip the proposal flow when a reference image is set: the media router
|
|
124
124
|
// doesn't know which models support image-to-image, so its suggestions
|
|
@@ -171,6 +171,12 @@ function buildExecute(deps) {
|
|
|
171
171
|
// Router / AskUser failed — fall back to default model silently.
|
|
172
172
|
}
|
|
173
173
|
}
|
|
174
|
+
// gpt-image-2 reliably serves 1024x1024 only — other sizes time out at
|
|
175
|
+
// the gateway. Force the supported size regardless of caller / router
|
|
176
|
+
// input so we never burn USDC on a request that's going to abort.
|
|
177
|
+
if (imageModel === 'openai/gpt-image-2' && imageSize !== '1024x1024') {
|
|
178
|
+
imageSize = '1024x1024';
|
|
179
|
+
}
|
|
174
180
|
if (contentId && deps.library) {
|
|
175
181
|
const decision = checkImageBudget(deps.library, contentId, imageModel, imageSize);
|
|
176
182
|
if (!decision.ok) {
|
|
@@ -427,7 +433,7 @@ export function createImageGenCapability(deps = {}) {
|
|
|
427
433
|
properties: {
|
|
428
434
|
prompt: { type: 'string', description: 'Text description of the image to generate' },
|
|
429
435
|
output_path: { type: 'string', description: 'Where to save the image. Default: generated-<timestamp>.png in working directory' },
|
|
430
|
-
size: { type: 'string', description: 'Image size: 1024x1024, 1792x1024, or 1024x1792. Default: 1024x1024' },
|
|
436
|
+
size: { type: 'string', description: 'Image size: 1024x1024, 1792x1024, or 1024x1792. Default: 1024x1024. Note: openai/gpt-image-2 is forced to 1024x1024 (other sizes time out at the gateway).' },
|
|
431
437
|
model: { type: 'string', description: 'Image model to use. Default: openai/gpt-image-1' },
|
|
432
438
|
image_url: { type: 'string', description: 'Optional reference image (image-to-image / style transfer). Accepts an http(s) URL, a data URI, or a local file path. Only works with edit-capable models.' },
|
|
433
439
|
contentId: { type: 'string', description: 'Optional Content id to attach this generation to. Pre-flight budget check + auto-record on success.' },
|
package/package.json
CHANGED