@lobehub/lobehub 2.0.0-next.115 → 2.0.0-next.116

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. package/CHANGELOG.md +25 -0
  2. package/changelog/v1.json +9 -0
  3. package/package.json +1 -1
  4. package/packages/context-engine/src/processors/MessageContent.ts +100 -6
  5. package/packages/context-engine/src/processors/__tests__/MessageContent.test.ts +239 -0
  6. package/packages/fetch-sse/src/fetchSSE.ts +30 -0
  7. package/packages/model-runtime/src/core/contextBuilders/google.test.ts +78 -24
  8. package/packages/model-runtime/src/core/contextBuilders/google.ts +10 -2
  9. package/packages/model-runtime/src/core/streams/google/google-ai.test.ts +451 -20
  10. package/packages/model-runtime/src/core/streams/google/index.ts +113 -3
  11. package/packages/model-runtime/src/core/streams/protocol.ts +19 -0
  12. package/packages/types/src/message/common/base.ts +26 -0
  13. package/packages/types/src/message/common/metadata.ts +7 -0
  14. package/packages/utils/src/index.ts +1 -0
  15. package/packages/utils/src/multimodalContent.ts +25 -0
  16. package/src/components/Thinking/index.tsx +3 -3
  17. package/src/features/ChatList/Messages/Assistant/DisplayContent.tsx +44 -0
  18. package/src/features/ChatList/Messages/Assistant/MessageBody.tsx +96 -0
  19. package/src/features/ChatList/Messages/Assistant/Reasoning/index.tsx +26 -13
  20. package/src/features/ChatList/Messages/Assistant/index.tsx +8 -6
  21. package/src/features/ChatList/Messages/Default.tsx +4 -7
  22. package/src/features/ChatList/components/RichContentRenderer.tsx +35 -0
  23. package/src/store/chat/slices/aiChat/actions/streamingExecutor.ts +244 -17
  24. package/src/features/ChatList/Messages/Assistant/MessageContent.tsx +0 -78
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  # Changelog
4
4
 
5
+ ## [Version 2.0.0-next.116](https://github.com/lobehub/lobe-chat/compare/v2.0.0-next.115...v2.0.0-next.116)
6
+
7
+ <sup>Released on **2025-11-25**</sup>
8
+
9
+ #### ✨ Features
10
+
11
+ - **misc**: Support nano banana pro.
12
+
13
+ <br/>
14
+
15
+ <details>
16
+ <summary><kbd>Improvements and Fixes</kbd></summary>
17
+
18
+ #### What's improved
19
+
20
+ - **misc**: Support nano banana pro, closes [#10413](https://github.com/lobehub/lobe-chat/issues/10413) ([a93cfcd](https://github.com/lobehub/lobe-chat/commit/a93cfcd))
21
+
22
+ </details>
23
+
24
+ <div align="right">
25
+
26
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
27
+
28
+ </div>
29
+
5
30
  ## [Version 2.0.0-next.115](https://github.com/lobehub/lobe-chat/compare/v2.0.0-next.114...v2.0.0-next.115)
6
31
 
7
32
  <sup>Released on **2025-11-25**</sup>
package/changelog/v1.json CHANGED
@@ -1,4 +1,13 @@
1
1
  [
2
+ {
3
+ "children": {
4
+ "features": [
5
+ "Support nano banana pro."
6
+ ]
7
+ },
8
+ "date": "2025-11-25",
9
+ "version": "2.0.0-next.116"
10
+ },
2
11
  {
3
12
  "children": {
4
13
  "features": [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/lobehub",
3
- "version": "2.0.0-next.115",
3
+ "version": "2.0.0-next.116",
4
4
  "description": "LobeHub - an open-source,comprehensive AI Agent framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -1,4 +1,5 @@
1
1
  import { filesPrompts } from '@lobechat/prompts';
2
+ import { MessageContentPart } from '@lobechat/types';
2
3
  import { imageUrlToBase64 } from '@lobechat/utils/imageToBase64';
3
4
  import { parseDataUri } from '@lobechat/utils/uriParser';
4
5
  import { isDesktopLocalStaticServerUrl } from '@lobechat/utils/url';
@@ -9,6 +10,23 @@ import type { PipelineContext, ProcessorOptions } from '../types';
9
10
 
10
11
  const log = debug('context-engine:processor:MessageContentProcessor');
11
12
 
13
+ /**
14
+ * Deserialize content string to message content parts
15
+ * Returns null if content is not valid JSON array of parts
16
+ */
17
+ const deserializeParts = (content: string): MessageContentPart[] | null => {
18
+ try {
19
+ const parsed = JSON.parse(content);
20
+ // Validate it's an array with valid part structure
21
+ if (Array.isArray(parsed) && parsed.length > 0 && parsed[0]?.type) {
22
+ return parsed as MessageContentPart[];
23
+ }
24
+ } catch {
25
+ // Not JSON, treat as plain text
26
+ }
27
+ return null;
28
+ };
29
+
12
30
  export interface FileContextConfig {
13
31
  /** Whether to enable file context injection */
14
32
  enabled?: boolean;
@@ -30,6 +48,7 @@ export interface MessageContentConfig {
30
48
  }
31
49
 
32
50
  export interface UserMessageContentPart {
51
+ googleThoughtSignature?: string;
33
52
  image_url?: {
34
53
  detail?: string;
35
54
  url: string;
@@ -213,7 +232,7 @@ export class MessageContentProcessor extends BaseProcessor {
213
232
  * Process assistant message content
214
233
  */
215
234
  private async processAssistantMessage(message: any): Promise<any> {
216
- // Check if there is reasoning content (thinking mode)
235
+ // Priority 1: Check if there is reasoning content with signature (thinking mode)
217
236
  const shouldIncludeThinking = message.reasoning && !!message.reasoning?.signature;
218
237
 
219
238
  if (shouldIncludeThinking) {
@@ -235,7 +254,59 @@ export class MessageContentProcessor extends BaseProcessor {
235
254
  };
236
255
  }
237
256
 
238
- // Check if there are images (assistant messages may also contain images)
257
+ // Priority 2: Check if reasoning content is multimodal
258
+ const hasMultimodalReasoning = message.reasoning?.isMultimodal && message.reasoning?.content;
259
+
260
+ if (hasMultimodalReasoning) {
261
+ const reasoningParts = deserializeParts(message.reasoning.content);
262
+ if (reasoningParts) {
263
+ // Convert reasoning multimodal parts to plain text
264
+ const reasoningText = reasoningParts
265
+ .map((part) => {
266
+ if (part.type === 'text') return part.text;
267
+ if (part.type === 'image') return `[Image: ${part.image}]`;
268
+ return '';
269
+ })
270
+ .join('\n');
271
+
272
+ // Update reasoning to plain text
273
+ const updatedMessage = {
274
+ ...message,
275
+ reasoning: {
276
+ ...message.reasoning,
277
+ content: reasoningText,
278
+ isMultimodal: false, // Convert to non-multimodal
279
+ },
280
+ };
281
+
282
+ // Handle main content based on whether it's multimodal
283
+ if (message.metadata?.isMultimodal && message.content) {
284
+ const contentParts = deserializeParts(message.content);
285
+ if (contentParts) {
286
+ const convertedParts = this.convertMessagePartsToContentParts(contentParts);
287
+ return {
288
+ ...updatedMessage,
289
+ content: convertedParts,
290
+ };
291
+ }
292
+ }
293
+
294
+ return updatedMessage;
295
+ }
296
+ }
297
+
298
+ // Priority 3: Check if message content is multimodal
299
+ const hasMultimodalContent = message.metadata?.isMultimodal && message.content;
300
+
301
+ if (hasMultimodalContent) {
302
+ const parts = deserializeParts(message.content);
303
+ if (parts) {
304
+ const contentParts = this.convertMessagePartsToContentParts(parts);
305
+ return { ...message, content: contentParts };
306
+ }
307
+ }
308
+
309
+ // Priority 4: Check if there are images (legacy imageList field)
239
310
  const hasImages = message.imageList && message.imageList.length > 0;
240
311
 
241
312
  if (hasImages && this.config.isCanUseVision?.(this.config.model, this.config.provider)) {
@@ -253,10 +324,7 @@ export class MessageContentProcessor extends BaseProcessor {
253
324
  const imageContentParts = await this.processImageList(message.imageList || []);
254
325
  contentParts.push(...imageContentParts);
255
326
 
256
- return {
257
- ...message,
258
- content: contentParts,
259
- };
327
+ return { ...message, content: contentParts };
260
328
  }
261
329
 
262
330
  // Regular assistant message, return plain text content
@@ -266,6 +334,32 @@ export class MessageContentProcessor extends BaseProcessor {
266
334
  };
267
335
  }
268
336
 
337
+ /**
338
+ * Convert MessageContentPart[] (internal format) to OpenAI-compatible UserMessageContentPart[]
339
+ */
340
+ private convertMessagePartsToContentParts(parts: MessageContentPart[]): UserMessageContentPart[] {
341
+ const contentParts: UserMessageContentPart[] = [];
342
+
343
+ for (const part of parts) {
344
+ if (part.type === 'text') {
345
+ contentParts.push({
346
+ googleThoughtSignature: part.thoughtSignature,
347
+ text: part.text,
348
+ type: 'text',
349
+ });
350
+ } else if (part.type === 'image') {
351
+ // Images are already in S3 URL format, no conversion needed
352
+ contentParts.push({
353
+ googleThoughtSignature: part.thoughtSignature,
354
+ image_url: { detail: 'auto', url: part.image },
355
+ type: 'image_url',
356
+ });
357
+ }
358
+ }
359
+
360
+ return contentParts;
361
+ }
362
+
269
363
  /**
270
364
  * Process image list
271
365
  */
@@ -566,4 +566,243 @@ describe('MessageContentProcessor', () => {
566
566
  expect(content[2].video_url.url).toBe('http://example.com/video.mp4');
567
567
  });
568
568
  });
569
+
570
+ describe('Multimodal message content processing', () => {
571
+ it('should convert assistant message with metadata.isMultimodal to OpenAI format', async () => {
572
+ const processor = new MessageContentProcessor({
573
+ model: 'gpt-4',
574
+ provider: 'openai',
575
+ isCanUseVision: mockIsCanUseVision,
576
+ fileContext: { enabled: false },
577
+ });
578
+
579
+ const messages: UIChatMessage[] = [
580
+ {
581
+ id: 'test',
582
+ role: 'assistant',
583
+ content: JSON.stringify([
584
+ { type: 'text', text: 'Here is an image:' },
585
+ { type: 'image', image: 'https://s3.example.com/image.png' },
586
+ { type: 'text', text: 'What do you think?' },
587
+ ]),
588
+ metadata: {
589
+ isMultimodal: true,
590
+ },
591
+ createdAt: Date.now(),
592
+ updatedAt: Date.now(),
593
+ meta: {},
594
+ },
595
+ ];
596
+
597
+ const result = await processor.process(createContext(messages));
598
+
599
+ expect(result.messages[0]).toMatchObject({
600
+ content: [
601
+ { type: 'text', text: 'Here is an image:' },
602
+ {
603
+ type: 'image_url',
604
+ image_url: { detail: 'auto', url: 'https://s3.example.com/image.png' },
605
+ },
606
+ { type: 'text', text: 'What do you think?' },
607
+ ],
608
+ });
609
+ });
610
+
611
+ it('should convert assistant message with reasoning.isMultimodal to plain text', async () => {
612
+ const processor = new MessageContentProcessor({
613
+ model: 'gpt-4',
614
+ provider: 'openai',
615
+ isCanUseVision: mockIsCanUseVision,
616
+ fileContext: { enabled: false },
617
+ });
618
+
619
+ const messages: UIChatMessage[] = [
620
+ {
621
+ id: 'test',
622
+ role: 'assistant',
623
+ content: 'The answer is correct.',
624
+ reasoning: {
625
+ content: JSON.stringify([
626
+ { type: 'text', text: 'Let me analyze this image:' },
627
+ { type: 'image', image: 'https://s3.example.com/reasoning-image.png' },
628
+ { type: 'text', text: 'Based on the analysis...' },
629
+ ]),
630
+ isMultimodal: true,
631
+ },
632
+ createdAt: Date.now(),
633
+ updatedAt: Date.now(),
634
+ meta: {},
635
+ },
636
+ ];
637
+
638
+ const result = await processor.process(createContext(messages));
639
+
640
+ expect(result.messages[0]).toMatchObject({
641
+ reasoning: {
642
+ content:
643
+ 'Let me analyze this image:\n[Image: https://s3.example.com/reasoning-image.png]\nBased on the analysis...',
644
+ isMultimodal: false,
645
+ },
646
+ content: 'The answer is correct.',
647
+ });
648
+ });
649
+
650
+ it('should handle both reasoning.isMultimodal and metadata.isMultimodal', async () => {
651
+ const processor = new MessageContentProcessor({
652
+ model: 'gpt-4',
653
+ provider: 'openai',
654
+ isCanUseVision: mockIsCanUseVision,
655
+ fileContext: { enabled: false },
656
+ });
657
+
658
+ const messages: UIChatMessage[] = [
659
+ {
660
+ id: 'test',
661
+ role: 'assistant',
662
+ content: JSON.stringify([
663
+ { type: 'text', text: 'Final result:' },
664
+ { type: 'image', image: 'https://s3.example.com/result.png' },
665
+ ]),
666
+ metadata: {
667
+ isMultimodal: true,
668
+ },
669
+ reasoning: {
670
+ content: JSON.stringify([
671
+ { type: 'text', text: 'Thinking about:' },
672
+ { type: 'image', image: 'https://s3.example.com/thinking.png' },
673
+ ]),
674
+ isMultimodal: true,
675
+ },
676
+ createdAt: Date.now(),
677
+ updatedAt: Date.now(),
678
+ meta: {},
679
+ },
680
+ ];
681
+
682
+ const result = await processor.process(createContext(messages));
683
+
684
+ expect(result.messages[0]).toMatchObject({
685
+ reasoning: {
686
+ content: 'Thinking about:\n[Image: https://s3.example.com/thinking.png]',
687
+ isMultimodal: false,
688
+ },
689
+ content: [
690
+ { type: 'text', text: 'Final result:' },
691
+ {
692
+ type: 'image_url',
693
+ image_url: { detail: 'auto', url: 'https://s3.example.com/result.png' },
694
+ },
695
+ ],
696
+ });
697
+ });
698
+
699
+ it('should prioritize reasoning.signature over reasoning.isMultimodal', async () => {
700
+ const processor = new MessageContentProcessor({
701
+ model: 'gpt-4',
702
+ provider: 'openai',
703
+ isCanUseVision: mockIsCanUseVision,
704
+ fileContext: { enabled: false },
705
+ });
706
+
707
+ const messages: UIChatMessage[] = [
708
+ {
709
+ id: 'test',
710
+ role: 'assistant',
711
+ content: 'The answer.',
712
+ reasoning: {
713
+ content: 'Some thinking process',
714
+ signature: 'sig123',
715
+ // Even if isMultimodal is true, signature takes priority
716
+ isMultimodal: true,
717
+ },
718
+ createdAt: Date.now(),
719
+ updatedAt: Date.now(),
720
+ meta: {},
721
+ },
722
+ ];
723
+
724
+ const result = await processor.process(createContext(messages));
725
+
726
+ expect(result.messages[0]).toMatchObject({
727
+ content: [
728
+ {
729
+ type: 'thinking',
730
+ thinking: 'Some thinking process',
731
+ signature: 'sig123',
732
+ },
733
+ { type: 'text', text: 'The answer.' },
734
+ ],
735
+ });
736
+ });
737
+
738
+ it('should handle plain text when isMultimodal is true but content is not valid JSON', async () => {
739
+ const processor = new MessageContentProcessor({
740
+ model: 'gpt-4',
741
+ provider: 'openai',
742
+ isCanUseVision: mockIsCanUseVision,
743
+ fileContext: { enabled: false },
744
+ });
745
+
746
+ const messages: UIChatMessage[] = [
747
+ {
748
+ id: 'test',
749
+ role: 'assistant',
750
+ content: 'This is plain text, not JSON',
751
+ metadata: {
752
+ isMultimodal: true,
753
+ },
754
+ createdAt: Date.now(),
755
+ updatedAt: Date.now(),
756
+ meta: {},
757
+ },
758
+ ];
759
+
760
+ const result = await processor.process(createContext(messages));
761
+
762
+ expect(result.messages[0]).toMatchObject({
763
+ content: 'This is plain text, not JSON',
764
+ });
765
+ });
766
+
767
+ it('should preserve thoughtSignature in multimodal content parts', async () => {
768
+ const processor = new MessageContentProcessor({
769
+ model: 'gpt-4',
770
+ provider: 'openai',
771
+ isCanUseVision: mockIsCanUseVision,
772
+ fileContext: { enabled: false },
773
+ });
774
+
775
+ const messages: UIChatMessage[] = [
776
+ {
777
+ id: 'test',
778
+ role: 'assistant',
779
+ content: JSON.stringify([
780
+ { type: 'text', text: 'Analysis result:', thoughtSignature: 'sig-001' },
781
+ { type: 'image', image: 'https://s3.example.com/chart.png', thoughtSignature: 'sig-002' },
782
+ { type: 'text', text: 'Conclusion' },
783
+ ]),
784
+ metadata: {
785
+ isMultimodal: true,
786
+ },
787
+ createdAt: Date.now(),
788
+ updatedAt: Date.now(),
789
+ meta: {},
790
+ },
791
+ ];
792
+
793
+ const result = await processor.process(createContext(messages));
794
+
795
+ expect(result.messages[0]).toMatchObject({
796
+ content: [
797
+ { type: 'text', text: 'Analysis result:', googleThoughtSignature: 'sig-001' },
798
+ {
799
+ type: 'image_url',
800
+ image_url: { detail: 'auto', url: 'https://s3.example.com/chart.png' },
801
+ googleThoughtSignature: 'sig-002',
802
+ },
803
+ { type: 'text', text: 'Conclusion' },
804
+ ],
805
+ });
806
+ });
807
+ });
569
808
  });
@@ -71,6 +71,22 @@ export interface MessageGroundingChunk {
71
71
  type: 'grounding';
72
72
  }
73
73
 
74
+ export interface MessageReasoningPartChunk {
75
+ content: string;
76
+ mimeType?: string;
77
+ partType: 'text' | 'image';
78
+ thoughtSignature?: string;
79
+ type: 'reasoning_part';
80
+ }
81
+
82
+ export interface MessageContentPartChunk {
83
+ content: string;
84
+ mimeType?: string;
85
+ partType: 'text' | 'image';
86
+ thoughtSignature?: string;
87
+ type: 'content_part';
88
+ }
89
+
74
90
  interface MessageToolCallsChunk {
75
91
  isAnimationActives?: boolean[];
76
92
  tool_calls: MessageToolCall[];
@@ -87,6 +103,8 @@ export interface FetchSSEOptions {
87
103
  | MessageTextChunk
88
104
  | MessageToolCallsChunk
89
105
  | MessageReasoningChunk
106
+ | MessageReasoningPartChunk
107
+ | MessageContentPartChunk
90
108
  | MessageGroundingChunk
91
109
  | MessageUsageChunk
92
110
  | MessageBase64ImageChunk
@@ -420,6 +438,18 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
420
438
  break;
421
439
  }
422
440
 
441
+ case 'reasoning_part':
442
+ case 'content_part': {
443
+ options.onMessageHandle?.({
444
+ content: data.content,
445
+ mimeType: data.mimeType,
446
+ partType: data.partType,
447
+ thoughtSignature: data.thoughtSignature,
448
+ type: ev.event,
449
+ });
450
+ break;
451
+ }
452
+
423
453
  case 'tool_calls': {
424
454
  // get finial
425
455
  // if there is no tool calls, we should initialize the tool calls