@lobehub/chat 0.145.0 → 0.145.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  # Changelog
4
4
 
5
+ ### [Version 0.145.1](https://github.com/lobehub/lobe-chat/compare/v0.145.0...v0.145.1)
6
+
7
+ <sup>Released on **2024-03-29**</sup>
8
+
9
+ #### 🐛 Bug Fixes
10
+
11
+ - **misc**: Fix Google Gemini pro 1.5 and system role not take effect.
12
+
13
+ <br/>
14
+
15
+ <details>
16
+ <summary><kbd>Improvements and Fixes</kbd></summary>
17
+
18
+ #### What's fixed
19
+
20
+ - **misc**: Fix Google Gemini pro 1.5 and system role not take effect, closes [#1801](https://github.com/lobehub/lobe-chat/issues/1801) ([0a3e3f7](https://github.com/lobehub/lobe-chat/commit/0a3e3f7))
21
+
22
+ </details>
23
+
24
+ <div align="right">
25
+
26
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
27
+
28
+ </div>
29
+
5
30
  ## [Version 0.145.0](https://github.com/lobehub/lobe-chat/compare/v0.144.1...v0.145.0)
6
31
 
7
32
  <sup>Released on **2024-03-29**</sup>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/chat",
3
- "version": "0.145.0",
3
+ "version": "0.145.1",
4
4
  "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -84,7 +84,7 @@
84
84
  "@aws-sdk/client-bedrock-runtime": "^3.525.0",
85
85
  "@azure/openai": "^1.0.0-beta.11",
86
86
  "@cfworker/json-schema": "^1",
87
- "@google/generative-ai": "^0.2.0",
87
+ "@google/generative-ai": "^0.3.1",
88
88
  "@icons-pack/react-simple-icons": "^9",
89
89
  "@lobehub/chat-plugin-sdk": "latest",
90
90
  "@lobehub/chat-plugins-gateway": "latest",
@@ -13,19 +13,7 @@ import { POST as UniverseRoute } from '../[provider]/route';
13
13
  // so if you want to use with proxy, you need comment the code below
14
14
  export const runtime = 'edge';
15
15
 
16
- export const preferredRegion = [
17
- 'bom1',
18
- 'cle1',
19
- 'cpt1',
20
- 'gru1',
21
- 'hnd1',
22
- 'iad1',
23
- 'icn1',
24
- 'kix1',
25
- 'pdx1',
26
- 'sfo1',
27
- 'sin1',
28
- 'syd1',
29
- ];
16
+ // due to gemini-1.5-pro only can be used in us, so we need to set the preferred region only in US
17
+ export const preferredRegion = ['cle1', 'iad1', 'pdx1', 'sfo1'];
30
18
 
31
19
  export const POST = async (req: Request) => UniverseRoute(req, { params: { provider: 'google' } });
@@ -3,25 +3,86 @@ import { ModelProviderCard } from '@/types/llm';
3
3
  const Google: ModelProviderCard = {
4
4
  chatModels: [
5
5
  {
6
- displayName: 'Gemini Pro',
6
+ description: 'A legacy text-only model optimized for chat conversations',
7
+ displayName: 'PaLM 2 Chat (Legacy)',
8
+ hidden: true,
9
+ id: 'chat-bison-001',
10
+ maxOutput: 1024,
11
+ tokens: 5120,
12
+ },
13
+ {
14
+ description: 'A legacy model that understands text and generates text as an output',
15
+ displayName: 'PaLM 2 (Legacy)',
16
+ hidden: true,
17
+ id: 'text-bison-001',
18
+ maxOutput: 1024,
19
+ tokens: 9220,
20
+ },
21
+ {
22
+ description: 'The best model for scaling across a wide range of tasks',
23
+ displayName: 'Gemini 1.0 Pro',
7
24
  id: 'gemini-pro',
8
- tokens: 30_720,
25
+ maxOutput: 2048,
26
+ tokens: 32_768,
27
+ },
28
+ {
29
+ description: 'The best image understanding model to handle a broad range of applications',
30
+ displayName: 'Gemini 1.0 Pro Vision',
31
+ id: 'gemini-1.0-pro-vision-latest',
32
+ maxOutput: 4096,
33
+ tokens: 16_384,
34
+ vision: true,
9
35
  },
10
36
  {
11
- displayName: 'Gemini Pro Vision',
37
+ description: 'The best image understanding model to handle a broad range of applications',
38
+ displayName: 'Gemini 1.0 Pro Vision',
39
+ hidden: true,
12
40
  id: 'gemini-pro-vision',
13
- tokens: 12_288,
41
+ maxOutput: 4096,
42
+ tokens: 16_384,
14
43
  vision: true,
15
44
  },
16
45
  {
46
+ description: 'The best model for scaling across a wide range of tasks',
47
+ displayName: 'Gemini 1.0 Pro',
48
+ hidden: true,
49
+ id: '1.0-pro',
50
+ maxOutput: 2048,
51
+ tokens: 32_768,
52
+ },
53
+ {
54
+ description:
55
+ 'The best model for scaling across a wide range of tasks. This is a stable model that supports tuning.',
56
+ displayName: 'Gemini 1.0 Pro 001 (Tuning)',
57
+ hidden: true,
58
+ id: 'gemini-1.0-pro-001',
59
+ maxOutput: 2048,
60
+ tokens: 32_768,
61
+ },
62
+ {
63
+ description:
64
+ 'The best model for scaling across a wide range of tasks. This is the latest model.',
65
+ displayName: 'Gemini 1.0 Pro Latest',
66
+ hidden: true,
67
+ id: 'gemini-1.0-pro-latest',
68
+ maxOutput: 2048,
69
+ tokens: 32_768,
70
+ },
71
+ {
72
+ description: 'Mid-size multimodal model that supports up to 1 million tokens',
17
73
  displayName: 'Gemini 1.5 Pro',
18
74
  id: 'gemini-1.5-pro-latest',
19
- tokens: 1_048_576,
75
+ maxOutput: 8192,
76
+ tokens: 1_056_768,
77
+ vision: true,
20
78
  },
21
79
  {
22
- displayName: 'Gemini Ultra',
80
+ description: 'The most capable model for highly complex tasks',
81
+ displayName: 'Gemini 1.0 Ultra',
82
+ hidden: true,
23
83
  id: 'gemini-ultra-latest',
24
- tokens: 30_720,
84
+ maxOutput: 2048,
85
+ tokens: 32_768,
25
86
  },
26
87
  ],
27
88
  id: 'google',
@@ -1,5 +1,4 @@
1
1
  // @vitest-environment edge-runtime
2
- import { GenerateContentRequest, GenerateContentStreamResult, Part } from '@google/generative-ai';
3
2
  import OpenAI from 'openai';
4
3
  import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
5
4
 
@@ -317,17 +316,55 @@ describe('LobeGoogleAI', () => {
317
316
  });
318
317
 
319
318
  describe('buildGoogleMessages', () => {
320
- it('should use default text model when no images are included in messages', () => {
319
+ it('get default result with gemini-pro', () => {
320
+ const messages: OpenAIChatMessage[] = [{ content: 'Hello', role: 'user' }];
321
+
322
+ const contents = instance['buildGoogleMessages'](messages, 'gemini-pro');
323
+
324
+ expect(contents).toHaveLength(1);
325
+ expect(contents).toEqual([{ parts: [{ text: 'Hello' }], role: 'user' }]);
326
+ });
327
+
328
+ it('messages should end with user if using gemini-pro', () => {
321
329
  const messages: OpenAIChatMessage[] = [
322
330
  { content: 'Hello', role: 'user' },
323
331
  { content: 'Hi', role: 'assistant' },
324
332
  ];
325
- const model = 'text-davinci-003';
326
333
 
327
- // 调用 buildGoogleMessages 方法
328
- const { contents, model: usedModel } = instance['buildGoogleMessages'](messages, model);
334
+ const contents = instance['buildGoogleMessages'](messages, 'gemini-pro');
335
+
336
+ expect(contents).toHaveLength(3);
337
+ expect(contents).toEqual([
338
+ { parts: [{ text: 'Hello' }], role: 'user' },
339
+ { parts: [{ text: 'Hi' }], role: 'model' },
340
+ { parts: [{ text: '' }], role: 'user' },
341
+ ]);
342
+ });
343
+
344
+ it('should include system role if there is a system role prompt', () => {
345
+ const messages: OpenAIChatMessage[] = [
346
+ { content: 'you are ChatGPT', role: 'system' },
347
+ { content: 'Who are you', role: 'user' },
348
+ ];
349
+
350
+ const contents = instance['buildGoogleMessages'](messages, 'gemini-pro');
351
+
352
+ expect(contents).toHaveLength(3);
353
+ expect(contents).toEqual([
354
+ { parts: [{ text: 'you are ChatGPT' }], role: 'user' },
355
+ { parts: [{ text: '' }], role: 'model' },
356
+ { parts: [{ text: 'Who are you' }], role: 'user' },
357
+ ]);
358
+ });
359
+
360
+ it('should not modify the length if model is gemini-1.5-pro', () => {
361
+ const messages: OpenAIChatMessage[] = [
362
+ { content: 'Hello', role: 'user' },
363
+ { content: 'Hi', role: 'assistant' },
364
+ ];
365
+
366
+ const contents = instance['buildGoogleMessages'](messages, 'gemini-1.5-pro-latest');
329
367
 
330
- expect(usedModel).toEqual('gemini-pro'); // 假设 'gemini-pro' 是默认文本模型
331
368
  expect(contents).toHaveLength(2);
332
369
  expect(contents).toEqual([
333
370
  { parts: [{ text: 'Hello' }], role: 'user' },
@@ -348,9 +385,8 @@ describe('LobeGoogleAI', () => {
348
385
  const model = 'gemini-pro-vision';
349
386
 
350
387
  // 调用 buildGoogleMessages 方法
351
- const { contents, model: usedModel } = instance['buildGoogleMessages'](messages, model);
388
+ const contents = instance['buildGoogleMessages'](messages, model);
352
389
 
353
- expect(usedModel).toEqual(model);
354
390
  expect(contents).toHaveLength(1);
355
391
  expect(contents).toEqual([
356
392
  {
@@ -360,5 +396,35 @@ describe('LobeGoogleAI', () => {
360
396
  ]);
361
397
  });
362
398
  });
399
+
400
+ describe('convertModel', () => {
401
+ it('should use default text model when no images are included in messages', () => {
402
+ const messages: OpenAIChatMessage[] = [
403
+ { content: 'Hello', role: 'user' },
404
+ { content: 'Hi', role: 'assistant' },
405
+ ];
406
+
407
+ // 调用 buildGoogleMessages 方法
408
+ const model = instance['convertModel']('gemini-pro-vision', messages);
409
+
410
+ expect(model).toEqual('gemini-pro'); // 假设 'gemini-pro' 是默认文本模型
411
+ });
412
+
413
+ it('should use specified model when images are included in messages', () => {
414
+ const messages: OpenAIChatMessage[] = [
415
+ {
416
+ content: [
417
+ { type: 'text', text: 'Hello' },
418
+ { type: 'image_url', image_url: { url: 'data:image/png;base64,...' } },
419
+ ],
420
+ role: 'user',
421
+ },
422
+ ];
423
+
424
+ const model = instance['convertModel']('gemini-pro-vision', messages);
425
+
426
+ expect(model).toEqual('gemini-pro-vision');
427
+ });
428
+ });
363
429
  });
364
430
  });
@@ -14,17 +14,6 @@ import { AgentRuntimeError } from '../utils/createError';
14
14
  import { debugStream } from '../utils/debugStream';
15
15
  import { parseDataUri } from '../utils/uriParser';
16
16
 
17
- type GoogleChatErrors = GoogleChatError[];
18
-
19
- interface GoogleChatError {
20
- '@type': string;
21
- 'domain': string;
22
- 'metadata': {
23
- service: string;
24
- };
25
- 'reason': string;
26
- }
27
-
28
17
  enum HarmCategory {
29
18
  HARM_CATEGORY_DANGEROUS_CONTENT = 'HARM_CATEGORY_DANGEROUS_CONTENT',
30
19
  HARM_CATEGORY_HARASSMENT = 'HARM_CATEGORY_HARASSMENT',
@@ -47,34 +36,42 @@ export class LobeGoogleAI implements LobeRuntimeAI {
47
36
 
48
37
  async chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions) {
49
38
  try {
50
- const { contents, model } = this.buildGoogleMessages(payload.messages, payload.model);
39
+ const model = this.convertModel(payload.model, payload.messages);
40
+
41
+ const contents = this.buildGoogleMessages(payload.messages, model);
42
+
51
43
  const geminiStream = await this.client
52
- .getGenerativeModel({
53
- generationConfig: {
54
- maxOutputTokens: payload.max_tokens,
55
- temperature: payload.temperature,
56
- topP: payload.top_p,
57
- },
58
- model,
59
- safetySettings: [
60
- {
61
- category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
62
- threshold: HarmBlockThreshold.BLOCK_NONE,
63
- },
64
- {
65
- category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
66
- threshold: HarmBlockThreshold.BLOCK_NONE,
44
+ .getGenerativeModel(
45
+ {
46
+ generationConfig: {
47
+ maxOutputTokens: payload.max_tokens,
48
+ temperature: payload.temperature,
49
+ topP: payload.top_p,
67
50
  },
68
- {
69
- category: HarmCategory.HARM_CATEGORY_HARASSMENT,
70
- threshold: HarmBlockThreshold.BLOCK_NONE,
71
- },
72
- {
73
- category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
74
- threshold: HarmBlockThreshold.BLOCK_NONE,
75
- },
76
- ],
77
- })
51
+ model,
52
+ // avoid wide sensitive words
53
+ // refs: https://github.com/lobehub/lobe-chat/pull/1418
54
+ safetySettings: [
55
+ {
56
+ category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
57
+ threshold: HarmBlockThreshold.BLOCK_NONE,
58
+ },
59
+ {
60
+ category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
61
+ threshold: HarmBlockThreshold.BLOCK_NONE,
62
+ },
63
+ {
64
+ category: HarmCategory.HARM_CATEGORY_HARASSMENT,
65
+ threshold: HarmBlockThreshold.BLOCK_NONE,
66
+ },
67
+ {
68
+ category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
69
+ threshold: HarmBlockThreshold.BLOCK_NONE,
70
+ },
71
+ ],
72
+ },
73
+ { apiVersion: 'v1beta' },
74
+ )
78
75
  .generateContentStream({ contents });
79
76
 
80
77
  // Convert the response into a friendly text-stream
@@ -127,25 +124,64 @@ export class LobeGoogleAI implements LobeRuntimeAI {
127
124
  typeof content === 'string'
128
125
  ? [{ text: content }]
129
126
  : content.map((c) => this.convertContentToGooglePart(c)),
130
- role: message.role === 'user' ? 'user' : 'model',
127
+ role: message.role === 'assistant' ? 'model' : 'user',
131
128
  };
132
129
  };
133
130
 
134
131
  // convert messages from the Vercel AI SDK Format to the format
135
132
  // that is expected by the Google GenAI SDK
136
- private buildGoogleMessages = (
137
- messages: OpenAIChatMessage[],
138
- model: string,
139
- ): { contents: Content[]; model: string } => {
140
- const contents = messages
141
- .filter((message) => message.role === 'user' || message.role === 'assistant')
142
- .map((msg) => this.convertOAIMessagesToGoogleMessage(msg));
143
-
144
- // if message are all text message, use vision will return error
145
- // use add an image to use models/gemini-pro-vision, or switch your model to a text model
146
- const noImage = messages.every((m) => typeof m.content === 'string');
147
-
148
- return { contents, model: noImage ? 'gemini-pro' : model };
133
+ private buildGoogleMessages = (messages: OpenAIChatMessage[], model: string): Content[] => {
134
+ // if the model is gemini-1.5-pro-latest, we don't need any special handling
135
+ if (model === 'gemini-1.5-pro-latest') {
136
+ return messages
137
+ .filter((message) => message.role !== 'function')
138
+ .map((msg) => this.convertOAIMessagesToGoogleMessage(msg));
139
+ }
140
+
141
+ const contents: Content[] = [];
142
+ let lastRole = 'model';
143
+
144
+ messages.forEach((message) => {
145
+ // current to filter function message
146
+ if (message.role === 'function') {
147
+ return;
148
+ }
149
+ const googleMessage = this.convertOAIMessagesToGoogleMessage(message);
150
+
151
+ // if the last message is a model message and the current message is a model message
152
+ // then we need to add a user message to separate them
153
+ if (lastRole === googleMessage.role) {
154
+ contents.push({ parts: [{ text: '' }], role: lastRole === 'user' ? 'model' : 'user' });
155
+ }
156
+
157
+ // add the current message to the contents
158
+ contents.push(googleMessage);
159
+
160
+ // update the last role
161
+ lastRole = googleMessage.role;
162
+ });
163
+
164
+ // if the last message is a user message, then we need to add a model message to separate them
165
+ if (lastRole === 'model') {
166
+ contents.push({ parts: [{ text: '' }], role: 'user' });
167
+ }
168
+
169
+ return contents;
170
+ };
171
+
172
+ private convertModel = (model: string, messages: OpenAIChatMessage[]) => {
173
+ let finalModel: string = model;
174
+
175
+ if (model.includes('pro-vision')) {
176
+ // if message are all text message, use vision will return an error:
177
+ // "[400 Bad Request] Add an image to use models/gemini-pro-vision, or switch your model to a text model."
178
+ const noNeedVision = messages.every((m) => typeof m.content === 'string');
179
+
180
+ // so we need to downgrade to gemini-pro
181
+ if (noNeedVision) finalModel = 'gemini-pro';
182
+ }
183
+
184
+ return finalModel;
149
185
  };
150
186
 
151
187
  private parseErrorMessage(message: string): {
@@ -191,3 +227,14 @@ export class LobeGoogleAI implements LobeRuntimeAI {
191
227
  }
192
228
 
193
229
  export default LobeGoogleAI;
230
+
231
+ type GoogleChatErrors = GoogleChatError[];
232
+
233
+ interface GoogleChatError {
234
+ '@type': string;
235
+ 'domain': string;
236
+ 'metadata': {
237
+ service: string;
238
+ };
239
+ 'reason': string;
240
+ }
package/src/types/llm.ts CHANGED
@@ -20,6 +20,9 @@ export interface ChatModelCard {
20
20
  */
21
21
  legacy?: boolean;
22
22
  maxOutput?: number;
23
+ /**
24
+ * the context window
25
+ */
23
26
  tokens?: number;
24
27
  /**
25
28
  * whether model supports vision