npm - @lobehub/chat - Versions diffs - 1.131.4 → 1.132.0 - Mend

@lobehub/chat 1.131.4 → 1.132.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,31 @@
 # Changelog
+## [Version 1.132.0](https://github.com/lobehub/lobe-chat/compare/v1.131.4...v1.132.0)
+<sup>Released on **2025-09-21**</sup>
+#### ✨ Features
+- **misc**: Support google video understanding.
+<br/>
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+#### What's improved
+- **misc**: Support google video understanding, closes [#8761](https://github.com/lobehub/lobe-chat/issues/8761) ([f02d43b](https://github.com/lobehub/lobe-chat/commit/f02d43b))
+</details>
+<div align="right">
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+</div>
 ### [Version 1.131.4](https://github.com/lobehub/lobe-chat/compare/v1.131.3...v1.131.4)
 <sup>Released on **2025-09-21**</sup>

package/changelog/v1.json CHANGED Viewed

@@ -1,4 +1,13 @@
 [
+  {
+    "children": {
+      "features": [
+        "Support google video understanding."
+      ]
+    },
+    "date": "2025-09-21",
+    "version": "1.132.0"
+  },
   {
     "children": {
       "improvements": [

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lobehub/chat",
-  "version": "1.131.4",
+  "version": "1.132.0",
   "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
   "keywords": [
     "framework",

package/packages/context-engine/src/processors/MessageContent.ts CHANGED Viewed

@@ -17,6 +17,8 @@ export interface FileContextConfig {
 export interface MessageContentConfig {
   /** File context configuration */
   fileContext?: FileContextConfig;
+  /** Function to check if video is supported */
+  isCanUseVideo?: (model: string, provider: string) => boolean | undefined;
   /** Function to check if vision is supported */
   isCanUseVision?: (model: string, provider: string) => boolean | undefined;
   /** Model name */
@@ -33,7 +35,10 @@ export interface UserMessageContentPart {
   signature?: string;
   text?: string;
   thinking?: string;
-  type: 'text' | 'image_url' | 'thinking';
+  type: 'text' | 'image_url' | 'thinking' | 'video_url';
+  video_url?: {
+    url: string;
+  };
 }
 /**
@@ -104,12 +109,13 @@ export class MessageContentProcessor extends BaseProcessor {
    * Process user message content
    */
   private async processUserMessage(message: any): Promise<any> {
-    // Check if images or files need processing
+    // Check if images, videos or files need processing
     const hasImages = message.imageList && message.imageList.length > 0;
+    const hasVideos = message.videoList && message.videoList.length > 0;
     const hasFiles = message.fileList && message.fileList.length > 0;
-    // If no images and files, return plain text content directly
-    if (!hasImages && !hasFiles) {
+    // If no images, videos and files, return plain text content directly
+    if (!hasImages && !hasVideos && !hasFiles) {
       return {
         ...message,
         content: message.content,
@@ -121,12 +127,13 @@ export class MessageContentProcessor extends BaseProcessor {
     // Add text content
     let textContent = message.content || '';
-    // Add file context (if file context is enabled and has files or images)
-    if ((hasFiles || hasImages) && this.config.fileContext?.enabled) {
+    // Add file context (if file context is enabled and has files, images or videos)
+    if ((hasFiles || hasImages || hasVideos) && this.config.fileContext?.enabled) {
       const filesContext = filesPrompts({
         addUrl: this.config.fileContext.includeFileUrl ?? true,
         fileList: message.fileList,
-        imageList: message.imageList,
+        imageList: message.imageList || [],
+        videoList: message.videoList || [],
       });
       if (filesContext) {
@@ -148,17 +155,26 @@ export class MessageContentProcessor extends BaseProcessor {
       contentParts.push(...imageContentParts);
     }
+    // Process video content
+    if (hasVideos && this.config.isCanUseVideo?.(this.config.model, this.config.provider)) {
+      const videoContentParts = await this.processVideoList(message.videoList || []);
+      contentParts.push(...videoContentParts);
+    }
     // 明确返回的字段，只保留必要的消息字段
-    const hasFileContext = (hasFiles || hasImages) && this.config.fileContext?.enabled;
+    const hasFileContext = (hasFiles || hasImages || hasVideos) && this.config.fileContext?.enabled;
     const hasVisionContent =
       hasImages && this.config.isCanUseVision?.(this.config.model, this.config.provider);
+    const hasVideoContent =
+      hasVideos && this.config.isCanUseVideo?.(this.config.model, this.config.provider);
-    // 如果只有文本内容且没有添加文件上下文也没有视觉内容，返回纯文本
+    // 如果只有文本内容且没有添加文件上下文也没有视觉/视频内容，返回纯文本
     if (
       contentParts.length === 1 &&
       contentParts[0].type === 'text' &&
       !hasFileContext &&
-      !hasVisionContent
+      !hasVisionContent &&
+      !hasVideoContent
     ) {
       return {
         content: contentParts[0].text,
@@ -274,6 +290,22 @@ export class MessageContentProcessor extends BaseProcessor {
     );
   }
+  /**
+   * 处理视频列表
+   */
+  private async processVideoList(videoList: any[]): Promise<UserMessageContentPart[]> {
+    if (!videoList || videoList.length === 0) {
+      return [];
+    }
+    return videoList.map((video) => {
+      return {
+        type: 'video_url',
+        video_url: { url: video.url },
+      } as UserMessageContentPart;
+    });
+  }
   /**
    * 验证内容部分格式
    */
@@ -290,6 +322,9 @@ export class MessageContentProcessor extends BaseProcessor {
       case 'thinking': {
         return !!(part.thinking && part.signature);
       }
+      case 'video_url': {
+        return !!(part.video_url && part.video_url.url);
+      }
       default: {
         return false;
       }

package/packages/context-engine/src/processors/__tests__/MessageContent.test.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ChatImageItem, ChatMessage } from '@lobechat/types';
+import { ChatImageItem, ChatMessage, ChatVideoItem } from '@lobechat/types';
 import { describe, expect, it, vi } from 'vitest';
 import type { PipelineContext } from '../../types';
@@ -26,6 +26,7 @@ const createContext = (messages: ChatMessage[]): PipelineContext => ({
 });
 const mockIsCanUseVision = vi.fn();
+const mockIsCanUseVideo = vi.fn();
 describe('MessageContentProcessor', () => {
   describe('Image processing functionality', () => {
@@ -391,4 +392,181 @@ describe('MessageContentProcessor', () => {
       expect(result.metadata.assistantMessagesProcessed).toBe(1);
     });
   });
+  describe('Video processing functionality', () => {
+    it('should return empty video content parts if model cannot use video', async () => {
+      mockIsCanUseVideo.mockReturnValue(false);
+      const processor = new MessageContentProcessor({
+        model: 'any-model',
+        provider: 'any-provider',
+        isCanUseVideo: mockIsCanUseVideo,
+        fileContext: { enabled: false },
+      });
+      const messages: ChatMessage[] = [
+        {
+          id: 'test',
+          role: 'user',
+          content: 'Hello',
+          videoList: [{ url: 'video_url', alt: 'test video', id: 'test' } as ChatVideoItem],
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+          meta: {},
+        },
+      ];
+      const result = await processor.process(createContext(messages));
+      // Should return plain text when video is not supported
+      expect(result.messages[0].content).toBe('Hello');
+    });
+    it('should process videos if model can use video', async () => {
+      mockIsCanUseVideo.mockReturnValue(true);
+      const processor = new MessageContentProcessor({
+        model: 'gpt-4-vision',
+        provider: 'openai',
+        isCanUseVideo: mockIsCanUseVideo,
+        fileContext: { enabled: false },
+      });
+      const messages: ChatMessage[] = [
+        {
+          id: 'test',
+          role: 'user',
+          content: 'Hello',
+          videoList: [
+            { url: 'http://example.com/video.mp4', alt: 'test video', id: 'test1' },
+            { url: 'http://example.com/video2.mp4', alt: 'test video 2', id: 'test2' },
+          ] as ChatVideoItem[],
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+          meta: {},
+        },
+      ];
+      const result = await processor.process(createContext(messages));
+      const content = result.messages[0].content as any[];
+      expect(content).toHaveLength(3); // text + 2 videos
+      expect(content[0].type).toBe('text');
+      expect(content[0].text).toBe('Hello');
+      expect(content[1].type).toBe('video_url');
+      expect(content[1].video_url.url).toBe('http://example.com/video.mp4');
+      expect(content[2].type).toBe('video_url');
+      expect(content[2].video_url.url).toBe('http://example.com/video2.mp4');
+    });
+    it('should handle video disabled scenario correctly', async () => {
+      mockIsCanUseVideo.mockReturnValue(false);
+      const processor = new MessageContentProcessor({
+        model: 'text-model',
+        provider: 'openai',
+        isCanUseVideo: mockIsCanUseVideo,
+        fileContext: { enabled: false },
+      });
+      const messages: ChatMessage[] = [
+        {
+          id: 'test',
+          role: 'user',
+          content: 'Analyze this video',
+          videoList: [
+            { url: 'http://example.com/video.mp4', alt: 'test video', id: 'test' },
+          ] as ChatVideoItem[],
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+          meta: {},
+        },
+      ];
+      const result = await processor.process(createContext(messages));
+      // Should return plain text only when video not supported
+      expect(result.messages[0].content).toBe('Analyze this video');
+    });
+    it('should include videos in file context when enabled', async () => {
+      mockIsCanUseVideo.mockReturnValue(false); // Video processing disabled but file context enabled
+      const processor = new MessageContentProcessor({
+        model: 'gpt-4',
+        provider: 'openai',
+        isCanUseVideo: mockIsCanUseVideo,
+        fileContext: { enabled: true, includeFileUrl: true },
+      });
+      const messages: ChatMessage[] = [
+        {
+          id: 'test',
+          role: 'user',
+          content: 'Hello',
+          videoList: [
+            {
+              id: 'video1',
+              url: 'http://example.com/video.mp4',
+              alt: 'Test video',
+            },
+          ] as ChatVideoItem[],
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+          meta: {},
+        },
+      ];
+      const result = await processor.process(createContext(messages));
+      // Should return structured content when has videos and file context enabled
+      expect(Array.isArray(result.messages[0].content)).toBe(true);
+      const content = result.messages[0].content as any[];
+      expect(content).toHaveLength(1);
+      expect(content[0].type).toBe('text');
+      expect(content[0].text).toContain('SYSTEM CONTEXT');
+      expect(content[0].text).toContain('Hello');
+    });
+    it('should handle mixed images and videos correctly', async () => {
+      mockIsCanUseVision.mockReturnValue(true);
+      mockIsCanUseVideo.mockReturnValue(true);
+      const processor = new MessageContentProcessor({
+        model: 'gpt-4-vision',
+        provider: 'openai',
+        isCanUseVideo: mockIsCanUseVideo,
+        isCanUseVision: mockIsCanUseVision,
+        fileContext: { enabled: false },
+      });
+      const messages: ChatMessage[] = [
+        {
+          id: 'test',
+          role: 'user',
+          content: 'Analyze these media files',
+          imageList: [
+            { url: 'http://example.com/image.jpg', alt: 'test image', id: 'img1' },
+          ] as ChatImageItem[],
+          videoList: [
+            { url: 'http://example.com/video.mp4', alt: 'test video', id: 'vid1' },
+          ] as ChatVideoItem[],
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+          meta: {},
+        },
+      ];
+      const result = await processor.process(createContext(messages));
+      const content = result.messages[0].content as any[];
+      expect(content).toHaveLength(3); // text + image + video
+      expect(content[0].type).toBe('text');
+      expect(content[0].text).toBe('Analyze these media files');
+      expect(content[1].type).toBe('image_url');
+      expect(content[1].image_url.url).toBe('http://example.com/image.jpg');
+      expect(content[2].type).toBe('video_url');
+      expect(content[2].video_url.url).toBe('http://example.com/video.mp4');
+    });
+  });
 });

package/packages/database/src/models/message.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import {
   ChatTTS,
   ChatToolPayload,
   ChatTranslate,
+  ChatVideoItem,
   CreateMessageParams,
   MessageItem,
   ModelRankItem,
@@ -175,7 +176,10 @@ export class MessageModel {
     }
     const imageList = relatedFileList.filter((i) => (i.fileType || '').startsWith('image'));
-    const fileList = relatedFileList.filter((i) => !(i.fileType || '').startsWith('image'));
+    const videoList = relatedFileList.filter((i) => (i.fileType || '').startsWith('video'));
+    const fileList = relatedFileList.filter(
+      (i) => !(i.fileType || '').startsWith('image') && !(i.fileType || '').startsWith('video'),
+    );
     // 3. get relative file chunks
     const chunksList = await this.db
@@ -251,6 +255,10 @@ export class MessageModel {
           ragQuery: messageQuery?.rewriteQuery,
           ragQueryId: messageQuery?.id,
           ragRawQuery: messageQuery?.userQuery,
+          videoList: videoList
+            .filter((relation) => relation.messageId === item.id)
+            // eslint-disable-next-line @typescript-eslint/no-unused-vars
+            .map<ChatVideoItem>(({ id, url, name }) => ({ alt: name!, id, url })),
         } as unknown as ChatMessage;
       },
     );

package/packages/model-bank/src/aiModels/google.ts CHANGED Viewed

@@ -7,6 +7,7 @@ const googleChatModels: AIChatModelCard[] = [
       functionCall: true,
       reasoning: true,
       search: true,
+      video: true,
       vision: true,
     },
     contextWindowTokens: 1_048_576 + 65_536,
@@ -60,6 +61,7 @@ const googleChatModels: AIChatModelCard[] = [
       functionCall: true,
       reasoning: true,
       search: true,
+      video: true,
       vision: true,
     },
     contextWindowTokens: 1_048_576 + 65_536,
@@ -112,6 +114,7 @@ const googleChatModels: AIChatModelCard[] = [
       functionCall: true,
       reasoning: true,
       search: true,
+      video: true,
       vision: true,
     },
     contextWindowTokens: 1_048_576 + 65_536,
@@ -163,6 +166,7 @@ const googleChatModels: AIChatModelCard[] = [
       functionCall: true,
       reasoning: true,
       search: true,
+      video: true,
       vision: true,
     },
     contextWindowTokens: 1_048_576 + 65_536,
@@ -191,6 +195,7 @@ const googleChatModels: AIChatModelCard[] = [
       functionCall: true,
       reasoning: true,
       search: true,
+      video: true,
       vision: true,
     },
     contextWindowTokens: 1_048_576 + 65_536,
@@ -240,6 +245,7 @@ const googleChatModels: AIChatModelCard[] = [
       functionCall: true,
       reasoning: true,
       search: true,
+      video: true,
       vision: true,
     },
     contextWindowTokens: 1_048_576 + 65_536,
@@ -267,6 +273,7 @@ const googleChatModels: AIChatModelCard[] = [
       functionCall: true,
       reasoning: true,
       search: true,
+      video: true,
       vision: true,
     },
     contextWindowTokens: 1_048_576 + 65_536,

package/packages/model-runtime/src/providers/google/index.ts CHANGED Viewed

@@ -439,10 +439,7 @@ export class LobeGoogleAI implements LobeRuntimeAI {
           }
           return {
-            inlineData: {
-              data: base64,
-              mimeType: mimeType || 'image/png',
-            },
+            inlineData: { data: base64, mimeType: mimeType || 'image/png' },
           };
         }
@@ -450,15 +447,41 @@ export class LobeGoogleAI implements LobeRuntimeAI {
           const { base64, mimeType } = await imageUrlToBase64(content.image_url.url);
           return {
-            inlineData: {
-              data: base64,
-              mimeType,
-            },
+            inlineData: { data: base64, mimeType },
           };
         }
         throw new TypeError(`currently we don't support image url: ${content.image_url.url}`);
       }
+      case 'video_url': {
+        const { mimeType, base64, type } = parseDataUri(content.video_url.url);
+        if (type === 'base64') {
+          if (!base64) {
+            throw new TypeError("Video URL doesn't contain base64 data");
+          }
+          return {
+            inlineData: { data: base64, mimeType: mimeType || 'video/mp4' },
+          };
+        }
+        if (type === 'url') {
+          // For video URLs, we need to fetch and convert to base64
+          // Note: This might need size/duration limits for practical use
+          const response = await fetch(content.video_url.url);
+          const arrayBuffer = await response.arrayBuffer();
+          const base64 = Buffer.from(arrayBuffer).toString('base64');
+          const mimeType = response.headers.get('content-type') || 'video/mp4';
+          return {
+            inlineData: { data: base64, mimeType },
+          };
+        }
+        throw new TypeError(`currently we don't support video url: ${content.video_url.url}`);
+      }
     }
   };

package/packages/model-runtime/src/types/chat.ts CHANGED Viewed

@@ -21,9 +21,15 @@ interface UserMessageContentPartImage {
   type: 'image_url';
 }
+interface UserMessageContentPartVideo {
+  type: 'video_url';
+  video_url: { url: string };
+}
 export type UserMessageContentPart =
   | UserMessageContentPartText
   | UserMessageContentPartImage
+  | UserMessageContentPartVideo
   | UserMessageContentPartThinking;
 export interface OpenAIChatMessage {