npm - @lobehub/lobehub - Versions diffs - 2.0.0-next.115 → 2.0.0-next.116 - Mend

@lobehub/lobehub 2.0.0-next.115 → 2.0.0-next.116

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,31 @@
 # Changelog
+## [Version 2.0.0-next.116](https://github.com/lobehub/lobe-chat/compare/v2.0.0-next.115...v2.0.0-next.116)
+<sup>Released on **2025-11-25**</sup>
+#### ✨ Features
+- **misc**: Support nano banana pro.
+<br/>
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+#### What's improved
+- **misc**: Support nano banana pro, closes [#10413](https://github.com/lobehub/lobe-chat/issues/10413) ([a93cfcd](https://github.com/lobehub/lobe-chat/commit/a93cfcd))
+</details>
+<div align="right">
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+</div>
 ## [Version 2.0.0-next.115](https://github.com/lobehub/lobe-chat/compare/v2.0.0-next.114...v2.0.0-next.115)
 <sup>Released on **2025-11-25**</sup>

package/changelog/v1.json CHANGED Viewed

@@ -1,4 +1,13 @@
 [
+  {
+    "children": {
+      "features": [
+        "Support nano banana pro."
+      ]
+    },
+    "date": "2025-11-25",
+    "version": "2.0.0-next.116"
+  },
   {
     "children": {
       "features": [

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lobehub/lobehub",
-  "version": "2.0.0-next.115",
+  "version": "2.0.0-next.116",
   "description": "LobeHub - an open-source,comprehensive AI Agent framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
   "keywords": [
     "framework",

package/packages/context-engine/src/processors/MessageContent.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { filesPrompts } from '@lobechat/prompts';
+import { MessageContentPart } from '@lobechat/types';
 import { imageUrlToBase64 } from '@lobechat/utils/imageToBase64';
 import { parseDataUri } from '@lobechat/utils/uriParser';
 import { isDesktopLocalStaticServerUrl } from '@lobechat/utils/url';
@@ -9,6 +10,23 @@ import type { PipelineContext, ProcessorOptions } from '../types';
 const log = debug('context-engine:processor:MessageContentProcessor');
+/**
+ * Deserialize content string to message content parts
+ * Returns null if content is not valid JSON array of parts
+ */
+const deserializeParts = (content: string): MessageContentPart[] | null => {
+  try {
+    const parsed = JSON.parse(content);
+    // Validate it's an array with valid part structure
+    if (Array.isArray(parsed) && parsed.length > 0 && parsed[0]?.type) {
+      return parsed as MessageContentPart[];
+    }
+  } catch {
+    // Not JSON, treat as plain text
+  }
+  return null;
+};
 export interface FileContextConfig {
   /** Whether to enable file context injection */
   enabled?: boolean;
@@ -30,6 +48,7 @@ export interface MessageContentConfig {
 }
 export interface UserMessageContentPart {
+  googleThoughtSignature?: string;
   image_url?: {
     detail?: string;
     url: string;
@@ -213,7 +232,7 @@ export class MessageContentProcessor extends BaseProcessor {
    * Process assistant message content
    */
   private async processAssistantMessage(message: any): Promise<any> {
-    // Check if there is reasoning content (thinking mode)
+    // Priority 1: Check if there is reasoning content with signature (thinking mode)
     const shouldIncludeThinking = message.reasoning && !!message.reasoning?.signature;
     if (shouldIncludeThinking) {
@@ -235,7 +254,59 @@ export class MessageContentProcessor extends BaseProcessor {
       };
     }
-    // Check if there are images (assistant messages may also contain images)
+    // Priority 2: Check if reasoning content is multimodal
+    const hasMultimodalReasoning = message.reasoning?.isMultimodal && message.reasoning?.content;
+    if (hasMultimodalReasoning) {
+      const reasoningParts = deserializeParts(message.reasoning.content);
+      if (reasoningParts) {
+        // Convert reasoning multimodal parts to plain text
+        const reasoningText = reasoningParts
+          .map((part) => {
+            if (part.type === 'text') return part.text;
+            if (part.type === 'image') return `[Image: ${part.image}]`;
+            return '';
+          })
+          .join('\n');
+        // Update reasoning to plain text
+        const updatedMessage = {
+          ...message,
+          reasoning: {
+            ...message.reasoning,
+            content: reasoningText,
+            isMultimodal: false, // Convert to non-multimodal
+          },
+        };
+        // Handle main content based on whether it's multimodal
+        if (message.metadata?.isMultimodal && message.content) {
+          const contentParts = deserializeParts(message.content);
+          if (contentParts) {
+            const convertedParts = this.convertMessagePartsToContentParts(contentParts);
+            return {
+              ...updatedMessage,
+              content: convertedParts,
+            };
+          }
+        }
+        return updatedMessage;
+      }
+    }
+    // Priority 3: Check if message content is multimodal
+    const hasMultimodalContent = message.metadata?.isMultimodal && message.content;
+    if (hasMultimodalContent) {
+      const parts = deserializeParts(message.content);
+      if (parts) {
+        const contentParts = this.convertMessagePartsToContentParts(parts);
+        return { ...message, content: contentParts };
+      }
+    }
+    // Priority 4: Check if there are images (legacy imageList field)
     const hasImages = message.imageList && message.imageList.length > 0;
     if (hasImages && this.config.isCanUseVision?.(this.config.model, this.config.provider)) {
@@ -253,10 +324,7 @@ export class MessageContentProcessor extends BaseProcessor {
       const imageContentParts = await this.processImageList(message.imageList || []);
       contentParts.push(...imageContentParts);
-      return {
-        ...message,
-        content: contentParts,
-      };
+      return { ...message, content: contentParts };
     }
     // Regular assistant message, return plain text content
@@ -266,6 +334,32 @@ export class MessageContentProcessor extends BaseProcessor {
     };
   }
+  /**
+   * Convert MessageContentPart[] (internal format) to OpenAI-compatible UserMessageContentPart[]
+   */
+  private convertMessagePartsToContentParts(parts: MessageContentPart[]): UserMessageContentPart[] {
+    const contentParts: UserMessageContentPart[] = [];
+    for (const part of parts) {
+      if (part.type === 'text') {
+        contentParts.push({
+          googleThoughtSignature: part.thoughtSignature,
+          text: part.text,
+          type: 'text',
+        });
+      } else if (part.type === 'image') {
+        // Images are already in S3 URL format, no conversion needed
+        contentParts.push({
+          googleThoughtSignature: part.thoughtSignature,
+          image_url: { detail: 'auto', url: part.image },
+          type: 'image_url',
+        });
+      }
+    }
+    return contentParts;
+  }
   /**
    * Process image list
    */

package/packages/context-engine/src/processors/__tests__/MessageContent.test.ts CHANGED Viewed

@@ -566,4 +566,243 @@ describe('MessageContentProcessor', () => {
       expect(content[2].video_url.url).toBe('http://example.com/video.mp4');
     });
   });
+  describe('Multimodal message content processing', () => {
+    it('should convert assistant message with metadata.isMultimodal to OpenAI format', async () => {
+      const processor = new MessageContentProcessor({
+        model: 'gpt-4',
+        provider: 'openai',
+        isCanUseVision: mockIsCanUseVision,
+        fileContext: { enabled: false },
+      });
+      const messages: UIChatMessage[] = [
+        {
+          id: 'test',
+          role: 'assistant',
+          content: JSON.stringify([
+            { type: 'text', text: 'Here is an image:' },
+            { type: 'image', image: 'https://s3.example.com/image.png' },
+            { type: 'text', text: 'What do you think?' },
+          ]),
+          metadata: {
+            isMultimodal: true,
+          },
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+          meta: {},
+        },
+      ];
+      const result = await processor.process(createContext(messages));
+      expect(result.messages[0]).toMatchObject({
+        content: [
+          { type: 'text', text: 'Here is an image:' },
+          {
+            type: 'image_url',
+            image_url: { detail: 'auto', url: 'https://s3.example.com/image.png' },
+          },
+          { type: 'text', text: 'What do you think?' },
+        ],
+      });
+    });
+    it('should convert assistant message with reasoning.isMultimodal to plain text', async () => {
+      const processor = new MessageContentProcessor({
+        model: 'gpt-4',
+        provider: 'openai',
+        isCanUseVision: mockIsCanUseVision,
+        fileContext: { enabled: false },
+      });
+      const messages: UIChatMessage[] = [
+        {
+          id: 'test',
+          role: 'assistant',
+          content: 'The answer is correct.',
+          reasoning: {
+            content: JSON.stringify([
+              { type: 'text', text: 'Let me analyze this image:' },
+              { type: 'image', image: 'https://s3.example.com/reasoning-image.png' },
+              { type: 'text', text: 'Based on the analysis...' },
+            ]),
+            isMultimodal: true,
+          },
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+          meta: {},
+        },
+      ];
+      const result = await processor.process(createContext(messages));
+      expect(result.messages[0]).toMatchObject({
+        reasoning: {
+          content:
+            'Let me analyze this image:\n[Image: https://s3.example.com/reasoning-image.png]\nBased on the analysis...',
+          isMultimodal: false,
+        },
+        content: 'The answer is correct.',
+      });
+    });
+    it('should handle both reasoning.isMultimodal and metadata.isMultimodal', async () => {
+      const processor = new MessageContentProcessor({
+        model: 'gpt-4',
+        provider: 'openai',
+        isCanUseVision: mockIsCanUseVision,
+        fileContext: { enabled: false },
+      });
+      const messages: UIChatMessage[] = [
+        {
+          id: 'test',
+          role: 'assistant',
+          content: JSON.stringify([
+            { type: 'text', text: 'Final result:' },
+            { type: 'image', image: 'https://s3.example.com/result.png' },
+          ]),
+          metadata: {
+            isMultimodal: true,
+          },
+          reasoning: {
+            content: JSON.stringify([
+              { type: 'text', text: 'Thinking about:' },
+              { type: 'image', image: 'https://s3.example.com/thinking.png' },
+            ]),
+            isMultimodal: true,
+          },
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+          meta: {},
+        },
+      ];
+      const result = await processor.process(createContext(messages));
+      expect(result.messages[0]).toMatchObject({
+        reasoning: {
+          content: 'Thinking about:\n[Image: https://s3.example.com/thinking.png]',
+          isMultimodal: false,
+        },
+        content: [
+          { type: 'text', text: 'Final result:' },
+          {
+            type: 'image_url',
+            image_url: { detail: 'auto', url: 'https://s3.example.com/result.png' },
+          },
+        ],
+      });
+    });
+    it('should prioritize reasoning.signature over reasoning.isMultimodal', async () => {
+      const processor = new MessageContentProcessor({
+        model: 'gpt-4',
+        provider: 'openai',
+        isCanUseVision: mockIsCanUseVision,
+        fileContext: { enabled: false },
+      });
+      const messages: UIChatMessage[] = [
+        {
+          id: 'test',
+          role: 'assistant',
+          content: 'The answer.',
+          reasoning: {
+            content: 'Some thinking process',
+            signature: 'sig123',
+            // Even if isMultimodal is true, signature takes priority
+            isMultimodal: true,
+          },
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+          meta: {},
+        },
+      ];
+      const result = await processor.process(createContext(messages));
+      expect(result.messages[0]).toMatchObject({
+        content: [
+          {
+            type: 'thinking',
+            thinking: 'Some thinking process',
+            signature: 'sig123',
+          },
+          { type: 'text', text: 'The answer.' },
+        ],
+      });
+    });
+    it('should handle plain text when isMultimodal is true but content is not valid JSON', async () => {
+      const processor = new MessageContentProcessor({
+        model: 'gpt-4',
+        provider: 'openai',
+        isCanUseVision: mockIsCanUseVision,
+        fileContext: { enabled: false },
+      });
+      const messages: UIChatMessage[] = [
+        {
+          id: 'test',
+          role: 'assistant',
+          content: 'This is plain text, not JSON',
+          metadata: {
+            isMultimodal: true,
+          },
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+          meta: {},
+        },
+      ];
+      const result = await processor.process(createContext(messages));
+      expect(result.messages[0]).toMatchObject({
+        content: 'This is plain text, not JSON',
+      });
+    });
+    it('should preserve thoughtSignature in multimodal content parts', async () => {
+      const processor = new MessageContentProcessor({
+        model: 'gpt-4',
+        provider: 'openai',
+        isCanUseVision: mockIsCanUseVision,
+        fileContext: { enabled: false },
+      });
+      const messages: UIChatMessage[] = [
+        {
+          id: 'test',
+          role: 'assistant',
+          content: JSON.stringify([
+            { type: 'text', text: 'Analysis result:', thoughtSignature: 'sig-001' },
+            { type: 'image', image: 'https://s3.example.com/chart.png', thoughtSignature: 'sig-002' },
+            { type: 'text', text: 'Conclusion' },
+          ]),
+          metadata: {
+            isMultimodal: true,
+          },
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+          meta: {},
+        },
+      ];
+      const result = await processor.process(createContext(messages));
+      expect(result.messages[0]).toMatchObject({
+        content: [
+          { type: 'text', text: 'Analysis result:', googleThoughtSignature: 'sig-001' },
+          {
+            type: 'image_url',
+            image_url: { detail: 'auto', url: 'https://s3.example.com/chart.png' },
+            googleThoughtSignature: 'sig-002',
+          },
+          { type: 'text', text: 'Conclusion' },
+        ],
+      });
+    });
+  });
 });

package/packages/fetch-sse/src/fetchSSE.ts CHANGED Viewed

@@ -71,6 +71,22 @@ export interface MessageGroundingChunk {
   type: 'grounding';
 }
+export interface MessageReasoningPartChunk {
+  content: string;
+  mimeType?: string;
+  partType: 'text' | 'image';
+  thoughtSignature?: string;
+  type: 'reasoning_part';
+}
+export interface MessageContentPartChunk {
+  content: string;
+  mimeType?: string;
+  partType: 'text' | 'image';
+  thoughtSignature?: string;
+  type: 'content_part';
+}
 interface MessageToolCallsChunk {
   isAnimationActives?: boolean[];
   tool_calls: MessageToolCall[];
@@ -87,6 +103,8 @@ export interface FetchSSEOptions {
       | MessageTextChunk
       | MessageToolCallsChunk
       | MessageReasoningChunk
+      | MessageReasoningPartChunk
+      | MessageContentPartChunk
       | MessageGroundingChunk
       | MessageUsageChunk
       | MessageBase64ImageChunk
@@ -420,6 +438,18 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
           break;
         }
+        case 'reasoning_part':
+        case 'content_part': {
+          options.onMessageHandle?.({
+            content: data.content,
+            mimeType: data.mimeType,
+            partType: data.partType,
+            thoughtSignature: data.thoughtSignature,
+            type: ev.event,
+          });
+          break;
+        }
         case 'tool_calls': {
           // get finial
           // if there is no tool calls, we should initialize the tool calls