npm - @lobehub/chat - Versions diffs - 0.145.0 → 0.145.1 - Mend

@lobehub/chat 0.145.0 → 0.145.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md +25 -0
package/package.json +2 -2
package/src/app/api/chat/google/route.ts +2 -14
package/src/config/modelProviders/google.ts +68 -7
package/src/libs/agent-runtime/google/index.test.ts +74 -8
package/src/libs/agent-runtime/google/index.ts +98 -51
package/src/types/llm.ts +3 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,31 @@
 # Changelog
+### [Version 0.145.1](https://github.com/lobehub/lobe-chat/compare/v0.145.0...v0.145.1)
+<sup>Released on **2024-03-29**</sup>
+#### 🐛 Bug Fixes
+- **misc**: Fix Google Gemini pro 1.5 and system role not take effect.
+<br/>
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+#### What's fixed
+- **misc**: Fix Google Gemini pro 1.5 and system role not take effect, closes [#1801](https://github.com/lobehub/lobe-chat/issues/1801) ([0a3e3f7](https://github.com/lobehub/lobe-chat/commit/0a3e3f7))
+</details>
+<div align="right">
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+</div>
 ## [Version 0.145.0](https://github.com/lobehub/lobe-chat/compare/v0.144.1...v0.145.0)
 <sup>Released on **2024-03-29**</sup>

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lobehub/chat",
-  "version": "0.145.0",
+  "version": "0.145.1",
   "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
   "keywords": [
     "framework",
@@ -84,7 +84,7 @@
     "@aws-sdk/client-bedrock-runtime": "^3.525.0",
     "@azure/openai": "^1.0.0-beta.11",
     "@cfworker/json-schema": "^1",
-    "@google/generative-ai": "^0.2.0",
+    "@google/generative-ai": "^0.3.1",
     "@icons-pack/react-simple-icons": "^9",
     "@lobehub/chat-plugin-sdk": "latest",
     "@lobehub/chat-plugins-gateway": "latest",

package/src/app/api/chat/google/route.ts CHANGED Viewed

@@ -13,19 +13,7 @@ import { POST as UniverseRoute } from '../[provider]/route';
 // so if you want to use with proxy, you need comment the code below
 export const runtime = 'edge';
-export const preferredRegion = [
-  'bom1',
-  'cle1',
-  'cpt1',
-  'gru1',
-  'hnd1',
-  'iad1',
-  'icn1',
-  'kix1',
-  'pdx1',
-  'sfo1',
-  'sin1',
-  'syd1',
-];
+// due to gemini-1.5-pro only can be used in us, so we need to set the preferred region only in US
+export const preferredRegion = ['cle1', 'iad1', 'pdx1', 'sfo1'];
 export const POST = async (req: Request) => UniverseRoute(req, { params: { provider: 'google' } });

package/src/config/modelProviders/google.ts CHANGED Viewed

@@ -3,25 +3,86 @@ import { ModelProviderCard } from '@/types/llm';
 const Google: ModelProviderCard = {
   chatModels: [
     {
-      displayName: 'Gemini Pro',
+      description: 'A legacy text-only model optimized for chat conversations',
+      displayName: 'PaLM 2 Chat (Legacy)',
+      hidden: true,
+      id: 'chat-bison-001',
+      maxOutput: 1024,
+      tokens: 5120,
+    },
+    {
+      description: 'A legacy model that understands text and generates text as an output',
+      displayName: 'PaLM 2 (Legacy)',
+      hidden: true,
+      id: 'text-bison-001',
+      maxOutput: 1024,
+      tokens: 9220,
+    },
+    {
+      description: 'The best model for scaling across a wide range of tasks',
+      displayName: 'Gemini 1.0 Pro',
       id: 'gemini-pro',
-      tokens: 30_720,
+      maxOutput: 2048,
+      tokens: 32_768,
+    },
+    {
+      description: 'The best image understanding model to handle a broad range of applications',
+      displayName: 'Gemini 1.0 Pro Vision',
+      id: 'gemini-1.0-pro-vision-latest',
+      maxOutput: 4096,
+      tokens: 16_384,
+      vision: true,
     },
     {
-      displayName: 'Gemini Pro Vision',
+      description: 'The best image understanding model to handle a broad range of applications',
+      displayName: 'Gemini 1.0 Pro Vision',
+      hidden: true,
       id: 'gemini-pro-vision',
-      tokens: 12_288,
+      maxOutput: 4096,
+      tokens: 16_384,
       vision: true,
     },
     {
+      description: 'The best model for scaling across a wide range of tasks',
+      displayName: 'Gemini 1.0 Pro',
+      hidden: true,
+      id: '1.0-pro',
+      maxOutput: 2048,
+      tokens: 32_768,
+    },
+    {
+      description:
+        'The best model for scaling across a wide range of tasks. This is a stable model that supports tuning.',
+      displayName: 'Gemini 1.0 Pro 001 (Tuning)',
+      hidden: true,
+      id: 'gemini-1.0-pro-001',
+      maxOutput: 2048,
+      tokens: 32_768,
+    },
+    {
+      description:
+        'The best model for scaling across a wide range of tasks. This is the latest model.',
+      displayName: 'Gemini 1.0 Pro Latest',
+      hidden: true,
+      id: 'gemini-1.0-pro-latest',
+      maxOutput: 2048,
+      tokens: 32_768,
+    },
+    {
+      description: 'Mid-size multimodal model that supports up to 1 million tokens',
       displayName: 'Gemini 1.5 Pro',
       id: 'gemini-1.5-pro-latest',
-      tokens: 1_048_576,
+      maxOutput: 8192,
+      tokens: 1_056_768,
+      vision: true,
     },
     {
-      displayName: 'Gemini Ultra',
+      description: 'The most capable model for highly complex tasks',
+      displayName: 'Gemini 1.0 Ultra',
+      hidden: true,
       id: 'gemini-ultra-latest',
-      tokens: 30_720,
+      maxOutput: 2048,
+      tokens: 32_768,
     },
   ],
   id: 'google',

package/src/libs/agent-runtime/google/index.test.ts CHANGED Viewed

@@ -1,5 +1,4 @@
 // @vitest-environment edge-runtime
-import { GenerateContentRequest, GenerateContentStreamResult, Part } from '@google/generative-ai';
 import OpenAI from 'openai';
 import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
@@ -317,17 +316,55 @@ describe('LobeGoogleAI', () => {
     });
     describe('buildGoogleMessages', () => {
-      it('should use default text model when no images are included in messages', () => {
+      it('get default result with gemini-pro', () => {
+        const messages: OpenAIChatMessage[] = [{ content: 'Hello', role: 'user' }];
+        const contents = instance['buildGoogleMessages'](messages, 'gemini-pro');
+        expect(contents).toHaveLength(1);
+        expect(contents).toEqual([{ parts: [{ text: 'Hello' }], role: 'user' }]);
+      });
+      it('messages should end with user if using gemini-pro', () => {
         const messages: OpenAIChatMessage[] = [
           { content: 'Hello', role: 'user' },
           { content: 'Hi', role: 'assistant' },
         ];
-        const model = 'text-davinci-003';
-        // 调用 buildGoogleMessages 方法
-        const { contents, model: usedModel } = instance['buildGoogleMessages'](messages, model);
+        const contents = instance['buildGoogleMessages'](messages, 'gemini-pro');
+        expect(contents).toHaveLength(3);
+        expect(contents).toEqual([
+          { parts: [{ text: 'Hello' }], role: 'user' },
+          { parts: [{ text: 'Hi' }], role: 'model' },
+          { parts: [{ text: '' }], role: 'user' },
+        ]);
+      });
+      it('should include system role if there is a system role prompt', () => {
+        const messages: OpenAIChatMessage[] = [
+          { content: 'you are ChatGPT', role: 'system' },
+          { content: 'Who are you', role: 'user' },
+        ];
+        const contents = instance['buildGoogleMessages'](messages, 'gemini-pro');
+        expect(contents).toHaveLength(3);
+        expect(contents).toEqual([
+          { parts: [{ text: 'you are ChatGPT' }], role: 'user' },
+          { parts: [{ text: '' }], role: 'model' },
+          { parts: [{ text: 'Who are you' }], role: 'user' },
+        ]);
+      });
+      it('should not modify the length if model is gemini-1.5-pro', () => {
+        const messages: OpenAIChatMessage[] = [
+          { content: 'Hello', role: 'user' },
+          { content: 'Hi', role: 'assistant' },
+        ];
+        const contents = instance['buildGoogleMessages'](messages, 'gemini-1.5-pro-latest');
-        expect(usedModel).toEqual('gemini-pro'); // 假设 'gemini-pro' 是默认文本模型
         expect(contents).toHaveLength(2);
         expect(contents).toEqual([
           { parts: [{ text: 'Hello' }], role: 'user' },
@@ -348,9 +385,8 @@ describe('LobeGoogleAI', () => {
         const model = 'gemini-pro-vision';
         // 调用 buildGoogleMessages 方法
-        const { contents, model: usedModel } = instance['buildGoogleMessages'](messages, model);
+        const contents = instance['buildGoogleMessages'](messages, model);
-        expect(usedModel).toEqual(model);
         expect(contents).toHaveLength(1);
         expect(contents).toEqual([
           {
@@ -360,5 +396,35 @@ describe('LobeGoogleAI', () => {
         ]);
       });
     });
+    describe('convertModel', () => {
+      it('should use default text model when no images are included in messages', () => {
+        const messages: OpenAIChatMessage[] = [
+          { content: 'Hello', role: 'user' },
+          { content: 'Hi', role: 'assistant' },
+        ];
+        // 调用 buildGoogleMessages 方法
+        const model = instance['convertModel']('gemini-pro-vision', messages);
+        expect(model).toEqual('gemini-pro'); // 假设 'gemini-pro' 是默认文本模型
+      });
+      it('should use specified model when images are included in messages', () => {
+        const messages: OpenAIChatMessage[] = [
+          {
+            content: [
+              { type: 'text', text: 'Hello' },
+              { type: 'image_url', image_url: { url: 'data:image/png;base64,...' } },
+            ],
+            role: 'user',
+          },
+        ];
+        const model = instance['convertModel']('gemini-pro-vision', messages);
+        expect(model).toEqual('gemini-pro-vision');
+      });
+    });
   });
 });

package/src/libs/agent-runtime/google/index.ts CHANGED Viewed

@@ -14,17 +14,6 @@ import { AgentRuntimeError } from '../utils/createError';
 import { debugStream } from '../utils/debugStream';
 import { parseDataUri } from '../utils/uriParser';
-type GoogleChatErrors = GoogleChatError[];
-interface GoogleChatError {
-  '@type': string;
-  'domain': string;
-  'metadata': {
-    service: string;
-  };
-  'reason': string;
-}
 enum HarmCategory {
   HARM_CATEGORY_DANGEROUS_CONTENT = 'HARM_CATEGORY_DANGEROUS_CONTENT',
   HARM_CATEGORY_HARASSMENT = 'HARM_CATEGORY_HARASSMENT',
@@ -47,34 +36,42 @@ export class LobeGoogleAI implements LobeRuntimeAI {
   async chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions) {
     try {
-      const { contents, model } = this.buildGoogleMessages(payload.messages, payload.model);
+      const model = this.convertModel(payload.model, payload.messages);
+      const contents = this.buildGoogleMessages(payload.messages, model);
       const geminiStream = await this.client
-        .getGenerativeModel({
-          generationConfig: {
-            maxOutputTokens: payload.max_tokens,
-            temperature: payload.temperature,
-            topP: payload.top_p,
-          },
-          model,
-          safetySettings: [
-            {
-              category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
-              threshold: HarmBlockThreshold.BLOCK_NONE,
-            },
-            {
-              category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
-              threshold: HarmBlockThreshold.BLOCK_NONE,
+        .getGenerativeModel(
+          {
+            generationConfig: {
+              maxOutputTokens: payload.max_tokens,
+              temperature: payload.temperature,
+              topP: payload.top_p,
             },
-            {
-              category: HarmCategory.HARM_CATEGORY_HARASSMENT,
-              threshold: HarmBlockThreshold.BLOCK_NONE,
-            },
-            {
-              category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
-              threshold: HarmBlockThreshold.BLOCK_NONE,
-            },
-          ],
-        })
+            model,
+            // avoid wide sensitive words
+            // refs: https://github.com/lobehub/lobe-chat/pull/1418
+            safetySettings: [
+              {
+                category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
+                threshold: HarmBlockThreshold.BLOCK_NONE,
+              },
+              {
+                category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
+                threshold: HarmBlockThreshold.BLOCK_NONE,
+              },
+              {
+                category: HarmCategory.HARM_CATEGORY_HARASSMENT,
+                threshold: HarmBlockThreshold.BLOCK_NONE,
+              },
+              {
+                category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
+                threshold: HarmBlockThreshold.BLOCK_NONE,
+              },
+            ],
+          },
+          { apiVersion: 'v1beta' },
+        )
         .generateContentStream({ contents });
       // Convert the response into a friendly text-stream
@@ -127,25 +124,64 @@ export class LobeGoogleAI implements LobeRuntimeAI {
         typeof content === 'string'
           ? [{ text: content }]
           : content.map((c) => this.convertContentToGooglePart(c)),
-      role: message.role === 'user' ? 'user' : 'model',
+      role: message.role === 'assistant' ? 'model' : 'user',
     };
   };
   // convert messages from the Vercel AI SDK Format to the format
   // that is expected by the Google GenAI SDK
-  private buildGoogleMessages = (
-    messages: OpenAIChatMessage[],
-    model: string,
-  ): { contents: Content[]; model: string } => {
-    const contents = messages
-      .filter((message) => message.role === 'user' || message.role === 'assistant')
-      .map((msg) => this.convertOAIMessagesToGoogleMessage(msg));
-    // if message are all text message, use vision will return error
-    // use add an image to use models/gemini-pro-vision, or switch your model to a text model
-    const noImage = messages.every((m) => typeof m.content === 'string');
-    return { contents, model: noImage ? 'gemini-pro' : model };
+  private buildGoogleMessages = (messages: OpenAIChatMessage[], model: string): Content[] => {
+    // if the model is gemini-1.5-pro-latest, we don't need any special handling
+    if (model === 'gemini-1.5-pro-latest') {
+      return messages
+        .filter((message) => message.role !== 'function')
+        .map((msg) => this.convertOAIMessagesToGoogleMessage(msg));
+    }
+    const contents: Content[] = [];
+    let lastRole = 'model';
+    messages.forEach((message) => {
+      // current to filter function message
+      if (message.role === 'function') {
+        return;
+      }
+      const googleMessage = this.convertOAIMessagesToGoogleMessage(message);
+      // if the last message is a model message and the current message is a model message
+      // then we need to add a user message to separate them
+      if (lastRole === googleMessage.role) {
+        contents.push({ parts: [{ text: '' }], role: lastRole === 'user' ? 'model' : 'user' });
+      }
+      // add the current message to the contents
+      contents.push(googleMessage);
+      // update the last role
+      lastRole = googleMessage.role;
+    });
+    // if the last message is a user message, then we need to add a model message to separate them
+    if (lastRole === 'model') {
+      contents.push({ parts: [{ text: '' }], role: 'user' });
+    }
+    return contents;
+  };
+  private convertModel = (model: string, messages: OpenAIChatMessage[]) => {
+    let finalModel: string = model;
+    if (model.includes('pro-vision')) {
+      // if message are all text message, use vision will return an error:
+      // "[400 Bad Request] Add an image to use models/gemini-pro-vision, or switch your model to a text model."
+      const noNeedVision = messages.every((m) => typeof m.content === 'string');
+      // so we need to downgrade to gemini-pro
+      if (noNeedVision) finalModel = 'gemini-pro';
+    }
+    return finalModel;
   };
   private parseErrorMessage(message: string): {
@@ -191,3 +227,14 @@ export class LobeGoogleAI implements LobeRuntimeAI {
 }
 export default LobeGoogleAI;
+type GoogleChatErrors = GoogleChatError[];
+interface GoogleChatError {
+  '@type': string;
+  'domain': string;
+  'metadata': {
+    service: string;
+  };
+  'reason': string;
+}

package/src/types/llm.ts CHANGED Viewed

@@ -20,6 +20,9 @@ export interface ChatModelCard {
    */
   legacy?: boolean;
   maxOutput?: number;
+  /**
+   * the context window
+   */
   tokens?: number;
   /**
    *  whether model supports vision