@blockrun/franklin 3.9.4 → 3.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent/llm.js CHANGED
@@ -7,6 +7,7 @@ import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, creat
7
7
  import { USER_AGENT } from '../config.js';
8
8
  import { routeRequest, parseRoutingProfile } from '../router/index.js';
9
9
  import { ThinkTagStripper } from './think-tag-stripper.js';
10
+ import { isNemotronProseModel, stripNemotronProse } from './nemotron-prose-stripper.js';
10
11
  function parseTimeoutEnv(name) {
11
12
  const raw = process.env[name];
12
13
  const parsed = raw ? Number.parseInt(raw, 10) : NaN;
@@ -420,6 +421,7 @@ export class ModelClient {
420
421
  let currentToolName = '';
421
422
  let currentToolInput = '';
422
423
  const textEmission = { mode: 'undecided' };
424
+ const isNemotronProse = isNemotronProseModel(request.model);
423
425
  // Split inline <think>…</think> emitted by reasoning models (nemotron,
424
426
  // deepseek-r1, qwq, etc.) that use the text field instead of the native
425
427
  // thinking block. Thinking emitted this way is display-only — we don't
@@ -439,7 +441,9 @@ export class ModelClient {
439
441
  const trimmed = currentText.trimStart();
440
442
  if (!trimmed)
441
443
  return;
442
- textEmission.mode = trimmed.startsWith('{') ? 'hold' : 'stream';
444
+ // Nemotron Omni leaks reasoning prose into the text channel without
445
+ // <think> tags. Hold the buffer for end-of-stream stripping.
446
+ textEmission.mode = isNemotronProse || trimmed.startsWith('{') ? 'hold' : 'stream';
443
447
  if (textEmission.mode === 'stream') {
444
448
  onStreamDelta?.({ type: 'text', text: currentText });
445
449
  }
@@ -585,6 +589,13 @@ export class ModelClient {
585
589
  'Treating it as non-productive output so recovery can try another model.');
586
590
  }
587
591
  }
592
+ else if (textEmission.mode === 'hold' && isNemotronProse) {
593
+ const { thinking, answer } = stripNemotronProse(currentText);
594
+ if (thinking)
595
+ onStreamDelta?.({ type: 'thinking', text: thinking });
596
+ onStreamDelta?.({ type: 'text', text: answer });
597
+ collected.push({ type: 'text', text: answer });
598
+ }
588
599
  else {
589
600
  if (textEmission.mode !== 'stream') {
590
601
  onStreamDelta?.({ type: 'text', text: currentText });
@@ -646,6 +657,13 @@ export class ModelClient {
646
657
  'Treating it as non-productive output so recovery can try another model.');
647
658
  }
648
659
  }
660
+ else if (textEmission.mode === 'hold' && isNemotronProse) {
661
+ const { thinking, answer } = stripNemotronProse(currentText);
662
+ if (thinking)
663
+ onStreamDelta?.({ type: 'thinking', text: thinking });
664
+ onStreamDelta?.({ type: 'text', text: answer });
665
+ collected.push({ type: 'text', text: answer });
666
+ }
649
667
  else {
650
668
  if (textEmission.mode !== 'stream') {
651
669
  onStreamDelta?.({ type: 'text', text: currentText });
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Strip leaked reasoning prose from Nemotron-family models.
3
+ *
4
+ * NVIDIA's Nemotron Omni reasoning model emits its chain of thought as plain
5
+ * text — without `<think>` tags or a separate reasoning_content channel — so
6
+ * the think-tag stripper can't catch it. The reasoning prose is then concatenated
7
+ * directly with the answer (often without even a separator), e.g.:
8
+ *
9
+ * "The user asks: ... According to instructions, we must obey. Just output
10
+ * the tokenOMNI_E2E_OK"
11
+ *
12
+ * This module detects the reasoning preamble (heuristic: leading sentence
13
+ * matches a known meta-reasoning opener) and strips everything up to and
14
+ * including the last "answer-introducer" phrase ("just output the token",
15
+ * "the answer is:", "output:", etc.). The stripped portion is returned as
16
+ * `thinking` so it can be routed to the thinking display channel; the
17
+ * remainder is the user-facing `answer`.
18
+ */
19
+ export declare function isNemotronProseModel(model: string): boolean;
20
+ export declare function stripNemotronProse(text: string): {
21
+ thinking: string;
22
+ answer: string;
23
+ };
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Strip leaked reasoning prose from Nemotron-family models.
3
+ *
4
+ * NVIDIA's Nemotron Omni reasoning model emits its chain of thought as plain
5
+ * text — without `<think>` tags or a separate reasoning_content channel — so
6
+ * the think-tag stripper can't catch it. The reasoning prose is then concatenated
7
+ * directly with the answer (often without even a separator), e.g.:
8
+ *
9
+ * "The user asks: ... According to instructions, we must obey. Just output
10
+ * the tokenOMNI_E2E_OK"
11
+ *
12
+ * This module detects the reasoning preamble (heuristic: leading sentence
13
+ * matches a known meta-reasoning opener) and strips everything up to and
14
+ * including the last "answer-introducer" phrase ("just output the token",
15
+ * "the answer is:", "output:", etc.). The stripped portion is returned as
16
+ * `thinking` so it can be routed to the thinking display channel; the
17
+ * remainder is the user-facing `answer`.
18
+ */
19
+ const REASONING_OPENERS = [
20
+ /^the user (asks|wants|says|requested|is asking|wants me|wrote|just|said)/i,
21
+ /^looking at (this|the)/i,
22
+ /^based on (the|this)/i,
23
+ /^according to/i,
24
+ /^we (must|should|need)/i,
25
+ /^i (need|should|must|will|'ll|am going to|have to)\s/i,
26
+ /^let me/i,
27
+ /^there'?s? no need/i,
28
+ /^okay,?\s+(the user|so|let|i)/i,
29
+ /^alright,?\s+(the user|so|let|i)/i,
30
+ /^so,?\s+the user/i,
31
+ /^the question (is|asks)/i,
32
+ /^the prompt (is|says|asks)/i,
33
+ ];
34
+ const ANSWER_INTRODUCERS = [
35
+ /\bjust\s+(?:output|respond|say|reply|return|emit|write|give|print)\s+(?:the|a|with|out|to|exactly|back|only)?\s*(?:token|word|answer|response|string|text|output|message)?\s*:?\s*/gi,
36
+ /\b(?:the|my)\s+(?:answer|response|token|output|reply)\s+is\s*:?\s*/gi,
37
+ /\bhere'?s?\s+(?:the|my)?\s*(?:response|answer|output|token|reply):?\s*/gi,
38
+ /(?:^|[\s.])(?:output|response|answer|reply|token)\s*:\s*/gi,
39
+ /\bi(?:'ll| will| shall)\s+(?:output|respond|say|reply|return|emit|write|give|print)\s+(?:the|a|with|out|to|exactly|back|only)?\s*(?:token|word|answer|response|string|text|output|message)?\s*:?\s*/gi,
40
+ ];
41
+ export function isNemotronProseModel(model) {
42
+ return /^nvidia\/nemotron-3-nano-omni/i.test(model);
43
+ }
44
+ export function stripNemotronProse(text) {
45
+ if (!text)
46
+ return { thinking: '', answer: '' };
47
+ const leadingWhitespaceMatch = text.match(/^\s*/);
48
+ const leadingWhitespace = leadingWhitespaceMatch ? leadingWhitespaceMatch[0] : '';
49
+ const trimmed = text.slice(leadingWhitespace.length);
50
+ if (!trimmed)
51
+ return { thinking: '', answer: text };
52
+ // Reject early: if no reasoning opener at the start, this isn't leaked prose.
53
+ if (!REASONING_OPENERS.some((p) => p.test(trimmed))) {
54
+ return { thinking: '', answer: text };
55
+ }
56
+ let lastEnd = -1;
57
+ for (const re of ANSWER_INTRODUCERS) {
58
+ const matches = [...trimmed.matchAll(re)];
59
+ for (const m of matches) {
60
+ const end = (m.index ?? 0) + m[0].length;
61
+ if (end > lastEnd)
62
+ lastEnd = end;
63
+ }
64
+ }
65
+ if (lastEnd === -1) {
66
+ // Reasoning detected but no transition phrase found. Conservative: leave
67
+ // the text intact rather than swallow what might be a legitimate answer.
68
+ return { thinking: '', answer: text };
69
+ }
70
+ const thinking = leadingWhitespace + trimmed.slice(0, lastEnd);
71
+ const answer = trimmed.slice(lastEnd).replace(/^[\s.,:;\-—]+/, '');
72
+ // Don't return an empty answer — fall back to the original text so the user
73
+ // gets *something* even if our heuristic over-stripped.
74
+ if (!answer)
75
+ return { thinking: '', answer: text };
76
+ return { thinking, answer };
77
+ }
@@ -118,7 +118,7 @@ function buildExecute(deps) {
118
118
  };
119
119
  }
120
120
  let imageModel = model || (referenceImage ? 'openai/gpt-image-2' : 'openai/gpt-image-1');
121
- const imageSize = size || '1024x1024';
121
+ let imageSize = size || '1024x1024';
122
122
  let chosenPrompt = prompt;
123
123
  // Skip the proposal flow when a reference image is set: the media router
124
124
  // doesn't know which models support image-to-image, so its suggestions
@@ -171,6 +171,12 @@ function buildExecute(deps) {
171
171
  // Router / AskUser failed — fall back to default model silently.
172
172
  }
173
173
  }
174
+ // gpt-image-2 reliably serves 1024x1024 only — other sizes time out at
175
+ // the gateway. Force the supported size regardless of caller / router
176
+ // input so we never burn USDC on a request that's going to abort.
177
+ if (imageModel === 'openai/gpt-image-2' && imageSize !== '1024x1024') {
178
+ imageSize = '1024x1024';
179
+ }
174
180
  if (contentId && deps.library) {
175
181
  const decision = checkImageBudget(deps.library, contentId, imageModel, imageSize);
176
182
  if (!decision.ok) {
@@ -427,7 +433,7 @@ export function createImageGenCapability(deps = {}) {
427
433
  properties: {
428
434
  prompt: { type: 'string', description: 'Text description of the image to generate' },
429
435
  output_path: { type: 'string', description: 'Where to save the image. Default: generated-<timestamp>.png in working directory' },
430
- size: { type: 'string', description: 'Image size: 1024x1024, 1792x1024, or 1024x1792. Default: 1024x1024' },
436
+ size: { type: 'string', description: 'Image size: 1024x1024, 1792x1024, or 1024x1792. Default: 1024x1024. Note: openai/gpt-image-2 is forced to 1024x1024 (other sizes time out at the gateway).' },
431
437
  model: { type: 'string', description: 'Image model to use. Default: openai/gpt-image-1' },
432
438
  image_url: { type: 'string', description: 'Optional reference image (image-to-image / style transfer). Accepts an http(s) URL, a data URI, or a local file path. Only works with edit-capable models.' },
433
439
  contentId: { type: 'string', description: 'Optional Content id to attach this generation to. Pre-flight budget check + auto-record on success.' },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.9.4",
3
+ "version": "3.9.5",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {