npm - @blockrun/franklin - Versions diffs - 3.9.4 → 3.9.5 - Mend

@blockrun/franklin 3.9.4 → 3.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/agent/llm.js +19 -1
package/dist/agent/nemotron-prose-stripper.d.ts +23 -0
package/dist/agent/nemotron-prose-stripper.js +77 -0
package/dist/tools/imagegen.js +8 -2
package/package.json +1 -1

package/dist/agent/llm.js CHANGED Viewed

@@ -7,6 +7,7 @@ import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, creat
 import { USER_AGENT } from '../config.js';
 import { routeRequest, parseRoutingProfile } from '../router/index.js';
 import { ThinkTagStripper } from './think-tag-stripper.js';
+import { isNemotronProseModel, stripNemotronProse } from './nemotron-prose-stripper.js';
 function parseTimeoutEnv(name) {
     const raw = process.env[name];
     const parsed = raw ? Number.parseInt(raw, 10) : NaN;
@@ -420,6 +421,7 @@ export class ModelClient {
         let currentToolName = '';
         let currentToolInput = '';
         const textEmission = { mode: 'undecided' };
+        const isNemotronProse = isNemotronProseModel(request.model);
         // Split inline <think>…</think> emitted by reasoning models (nemotron,
         // deepseek-r1, qwq, etc.) that use the text field instead of the native
         // thinking block. Thinking emitted this way is display-only — we don't
@@ -439,7 +441,9 @@ export class ModelClient {
                 const trimmed = currentText.trimStart();
                 if (!trimmed)
                     return;
-                textEmission.mode = trimmed.startsWith('{') ? 'hold' : 'stream';
+                // Nemotron Omni leaks reasoning prose into the text channel without
+                // <think> tags. Hold the buffer for end-of-stream stripping.
+                textEmission.mode = isNemotronProse || trimmed.startsWith('{') ? 'hold' : 'stream';
                 if (textEmission.mode === 'stream') {
                     onStreamDelta?.({ type: 'text', text: currentText });
                 }
@@ -585,6 +589,13 @@ export class ModelClient {
                                         'Treating it as non-productive output so recovery can try another model.');
                                 }
                             }
+                            else if (textEmission.mode === 'hold' && isNemotronProse) {
+                                const { thinking, answer } = stripNemotronProse(currentText);
+                                if (thinking)
+                                    onStreamDelta?.({ type: 'thinking', text: thinking });
+                                onStreamDelta?.({ type: 'text', text: answer });
+                                collected.push({ type: 'text', text: answer });
+                            }
                             else {
                                 if (textEmission.mode !== 'stream') {
                                     onStreamDelta?.({ type: 'text', text: currentText });
@@ -646,6 +657,13 @@ export class ModelClient {
                         'Treating it as non-productive output so recovery can try another model.');
                 }
             }
+            else if (textEmission.mode === 'hold' && isNemotronProse) {
+                const { thinking, answer } = stripNemotronProse(currentText);
+                if (thinking)
+                    onStreamDelta?.({ type: 'thinking', text: thinking });
+                onStreamDelta?.({ type: 'text', text: answer });
+                collected.push({ type: 'text', text: answer });
+            }
             else {
                 if (textEmission.mode !== 'stream') {
                     onStreamDelta?.({ type: 'text', text: currentText });

package/dist/agent/nemotron-prose-stripper.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+/**
+ * Strip leaked reasoning prose from Nemotron-family models.
+ *
+ * NVIDIA's Nemotron Omni reasoning model emits its chain of thought as plain
+ * text — without `<think>` tags or a separate reasoning_content channel — so
+ * the think-tag stripper can't catch it. The reasoning prose is then concatenated
+ * directly with the answer (often without even a separator), e.g.:
+ *
+ *   "The user asks: ... According to instructions, we must obey. Just output
+ *    the tokenOMNI_E2E_OK"
+ *
+ * This module detects the reasoning preamble (heuristic: leading sentence
+ * matches a known meta-reasoning opener) and strips everything up to and
+ * including the last "answer-introducer" phrase ("just output the token",
+ * "the answer is:", "output:", etc.). The stripped portion is returned as
+ * `thinking` so it can be routed to the thinking display channel; the
+ * remainder is the user-facing `answer`.
+ */
+export declare function isNemotronProseModel(model: string): boolean;
+export declare function stripNemotronProse(text: string): {
+    thinking: string;
+    answer: string;
+};

package/dist/agent/nemotron-prose-stripper.js ADDED Viewed

@@ -0,0 +1,77 @@
+/**
+ * Strip leaked reasoning prose from Nemotron-family models.
+ *
+ * NVIDIA's Nemotron Omni reasoning model emits its chain of thought as plain
+ * text — without `<think>` tags or a separate reasoning_content channel — so
+ * the think-tag stripper can't catch it. The reasoning prose is then concatenated
+ * directly with the answer (often without even a separator), e.g.:
+ *
+ *   "The user asks: ... According to instructions, we must obey. Just output
+ *    the tokenOMNI_E2E_OK"
+ *
+ * This module detects the reasoning preamble (heuristic: leading sentence
+ * matches a known meta-reasoning opener) and strips everything up to and
+ * including the last "answer-introducer" phrase ("just output the token",
+ * "the answer is:", "output:", etc.). The stripped portion is returned as
+ * `thinking` so it can be routed to the thinking display channel; the
+ * remainder is the user-facing `answer`.
+ */
+const REASONING_OPENERS = [
+    /^the user (asks|wants|says|requested|is asking|wants me|wrote|just|said)/i,
+    /^looking at (this|the)/i,
+    /^based on (the|this)/i,
+    /^according to/i,
+    /^we (must|should|need)/i,
+    /^i (need|should|must|will|'ll|am going to|have to)\s/i,
+    /^let me/i,
+    /^there'?s? no need/i,
+    /^okay,?\s+(the user|so|let|i)/i,
+    /^alright,?\s+(the user|so|let|i)/i,
+    /^so,?\s+the user/i,
+    /^the question (is|asks)/i,
+    /^the prompt (is|says|asks)/i,
+];
+const ANSWER_INTRODUCERS = [
+    /\bjust\s+(?:output|respond|say|reply|return|emit|write|give|print)\s+(?:the|a|with|out|to|exactly|back|only)?\s*(?:token|word|answer|response|string|text|output|message)?\s*:?\s*/gi,
+    /\b(?:the|my)\s+(?:answer|response|token|output|reply)\s+is\s*:?\s*/gi,
+    /\bhere'?s?\s+(?:the|my)?\s*(?:response|answer|output|token|reply):?\s*/gi,
+    /(?:^|[\s.])(?:output|response|answer|reply|token)\s*:\s*/gi,
+    /\bi(?:'ll| will| shall)\s+(?:output|respond|say|reply|return|emit|write|give|print)\s+(?:the|a|with|out|to|exactly|back|only)?\s*(?:token|word|answer|response|string|text|output|message)?\s*:?\s*/gi,
+];
+export function isNemotronProseModel(model) {
+    return /^nvidia\/nemotron-3-nano-omni/i.test(model);
+}
+export function stripNemotronProse(text) {
+    if (!text)
+        return { thinking: '', answer: '' };
+    const leadingWhitespaceMatch = text.match(/^\s*/);
+    const leadingWhitespace = leadingWhitespaceMatch ? leadingWhitespaceMatch[0] : '';
+    const trimmed = text.slice(leadingWhitespace.length);
+    if (!trimmed)
+        return { thinking: '', answer: text };
+    // Reject early: if no reasoning opener at the start, this isn't leaked prose.
+    if (!REASONING_OPENERS.some((p) => p.test(trimmed))) {
+        return { thinking: '', answer: text };
+    }
+    let lastEnd = -1;
+    for (const re of ANSWER_INTRODUCERS) {
+        const matches = [...trimmed.matchAll(re)];
+        for (const m of matches) {
+            const end = (m.index ?? 0) + m[0].length;
+            if (end > lastEnd)
+                lastEnd = end;
+        }
+    }
+    if (lastEnd === -1) {
+        // Reasoning detected but no transition phrase found. Conservative: leave
+        // the text intact rather than swallow what might be a legitimate answer.
+        return { thinking: '', answer: text };
+    }
+    const thinking = leadingWhitespace + trimmed.slice(0, lastEnd);
+    const answer = trimmed.slice(lastEnd).replace(/^[\s.,:;\-—]+/, '');
+    // Don't return an empty answer — fall back to the original text so the user
+    // gets *something* even if our heuristic over-stripped.
+    if (!answer)
+        return { thinking: '', answer: text };
+    return { thinking, answer };
+}

package/dist/tools/imagegen.js CHANGED Viewed

@@ -118,7 +118,7 @@ function buildExecute(deps) {
             };
         }
         let imageModel = model || (referenceImage ? 'openai/gpt-image-2' : 'openai/gpt-image-1');
-        const imageSize = size || '1024x1024';
+        let imageSize = size || '1024x1024';
         let chosenPrompt = prompt;
         // Skip the proposal flow when a reference image is set: the media router
         // doesn't know which models support image-to-image, so its suggestions
@@ -171,6 +171,12 @@ function buildExecute(deps) {
                 // Router / AskUser failed — fall back to default model silently.
             }
         }
+        // gpt-image-2 reliably serves 1024x1024 only — other sizes time out at
+        // the gateway. Force the supported size regardless of caller / router
+        // input so we never burn USDC on a request that's going to abort.
+        if (imageModel === 'openai/gpt-image-2' && imageSize !== '1024x1024') {
+            imageSize = '1024x1024';
+        }
         if (contentId && deps.library) {
             const decision = checkImageBudget(deps.library, contentId, imageModel, imageSize);
             if (!decision.ok) {
@@ -427,7 +433,7 @@ export function createImageGenCapability(deps = {}) {
                 properties: {
                     prompt: { type: 'string', description: 'Text description of the image to generate' },
                     output_path: { type: 'string', description: 'Where to save the image. Default: generated-<timestamp>.png in working directory' },
-                    size: { type: 'string', description: 'Image size: 1024x1024, 1792x1024, or 1024x1792. Default: 1024x1024' },
+                    size: { type: 'string', description: 'Image size: 1024x1024, 1792x1024, or 1024x1792. Default: 1024x1024. Note: openai/gpt-image-2 is forced to 1024x1024 (other sizes time out at the gateway).' },
                     model: { type: 'string', description: 'Image model to use. Default: openai/gpt-image-1' },
                     image_url: { type: 'string', description: 'Optional reference image (image-to-image / style transfer). Accepts an http(s) URL, a data URI, or a local file path. Only works with edit-capable models.' },
                     contentId: { type: 'string', description: 'Optional Content id to attach this generation to. Pre-flight budget check + auto-record on success.' },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.9.4",
+  "version": "3.9.5",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {