@lobehub/chat 1.36.32 → 1.36.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  # Changelog
4
4
 
5
+ ### [Version 1.36.33](https://github.com/lobehub/lobe-chat/compare/v1.36.32...v1.36.33)
6
+
7
+ <sup>Released on **2024-12-18**</sup>
8
+
9
+ #### 🐛 Bug Fixes
10
+
11
+ - **misc**: Fix GitHub model fetch.
12
+
13
+ <br/>
14
+
15
+ <details>
16
+ <summary><kbd>Improvements and Fixes</kbd></summary>
17
+
18
+ #### What's fixed
19
+
20
+ - **misc**: Fix GitHub model fetch, closes [#4645](https://github.com/lobehub/lobe-chat/issues/4645) ([b69dce3](https://github.com/lobehub/lobe-chat/commit/b69dce3))
21
+
22
+ </details>
23
+
24
+ <div align="right">
25
+
26
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
27
+
28
+ </div>
29
+
5
30
  ### [Version 1.36.32](https://github.com/lobehub/lobe-chat/compare/v1.36.31...v1.36.32)
6
31
 
7
32
  <sup>Released on **2024-12-17**</sup>
package/changelog/v1.json CHANGED
@@ -1,4 +1,13 @@
1
1
  [
2
+ {
3
+ "children": {
4
+ "fixes": [
5
+ "Fix GitHub model fetch."
6
+ ]
7
+ },
8
+ "date": "2024-12-18",
9
+ "version": "1.36.33"
10
+ },
2
11
  {
3
12
  "children": {
4
13
  "improvements": [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/chat",
3
- "version": "1.36.32",
3
+ "version": "1.36.33",
4
4
  "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -15,7 +15,8 @@ const Github: ModelProviderCard = {
15
15
  vision: true,
16
16
  },
17
17
  {
18
- description: '专注于高级推理和解决复杂问题,包括数学和科学任务。非常适合需要深度上下文理解和自主工作流程的应用。',
18
+ description:
19
+ '专注于高级推理和解决复杂问题,包括数学和科学任务。非常适合需要深度上下文理解和自主工作流程的应用。',
19
20
  displayName: 'OpenAI o1-preview',
20
21
  enabled: true,
21
22
  functionCall: false,
@@ -45,7 +46,8 @@ const Github: ModelProviderCard = {
45
46
  vision: true,
46
47
  },
47
48
  {
48
- description: '一个52B参数(12B活跃)的多语言模型,提供256K长上下文窗口、函数调用、结构化输出和基于事实的生成。',
49
+ description:
50
+ '一个52B参数(12B活跃)的多语言模型,提供256K长上下文窗口、函数调用、结构化输出和基于事实的生成。',
49
51
  displayName: 'AI21 Jamba 1.5 Mini',
50
52
  functionCall: true,
51
53
  id: 'ai21-jamba-1.5-mini',
@@ -53,7 +55,8 @@ const Github: ModelProviderCard = {
53
55
  tokens: 262_144,
54
56
  },
55
57
  {
56
- description: '一个398B参数(94B活跃)的多语言模型,提供256K长上下文窗口、函数调用、结构化输出和基于事实的生成。',
58
+ description:
59
+ '一个398B参数(94B活跃)的多语言模型,提供256K长上下文窗口、函数调用、结构化输出和基于事实的生成。',
57
60
  displayName: 'AI21 Jamba 1.5 Large',
58
61
  functionCall: true,
59
62
  id: 'ai21-jamba-1.5-large',
@@ -61,7 +64,8 @@ const Github: ModelProviderCard = {
61
64
  tokens: 262_144,
62
65
  },
63
66
  {
64
- description: 'Command R是一个可扩展的生成模型,旨在针对RAG和工具使用,使企业能够实现生产级AI。',
67
+ description:
68
+ 'Command R是一个可扩展的生成模型,旨在针对RAG和工具使用,使企业能够实现生产级AI。',
65
69
  displayName: 'Cohere Command R',
66
70
  id: 'cohere-command-r',
67
71
  maxOutput: 4096,
@@ -75,7 +79,8 @@ const Github: ModelProviderCard = {
75
79
  tokens: 131_072,
76
80
  },
77
81
  {
78
- description: 'Mistral Nemo是一种尖端的语言模型(LLM),在其尺寸类别中拥有最先进的推理、世界知识和编码能力。',
82
+ description:
83
+ 'Mistral Nemo是一种尖端的语言模型(LLM),在其尺寸类别中拥有最先进的推理、世界知识和编码能力。',
79
84
  displayName: 'Mistral Nemo',
80
85
  id: 'mistral-nemo',
81
86
  maxOutput: 4096,
@@ -89,7 +94,8 @@ const Github: ModelProviderCard = {
89
94
  tokens: 131_072,
90
95
  },
91
96
  {
92
- description: 'Mistral的旗舰模型,适合需要大规模推理能力或高度专业化的复杂任务(合成文本生成、代码生成、RAG或代理)。',
97
+ description:
98
+ 'Mistral的旗舰模型,适合需要大规模推理能力或高度专业化的复杂任务(合成文本生成、代码生成、RAG或代理)。',
93
99
  displayName: 'Mistral Large',
94
100
  id: 'mistral-large',
95
101
  maxOutput: 4096,
@@ -112,21 +118,24 @@ const Github: ModelProviderCard = {
112
118
  vision: true,
113
119
  },
114
120
  {
115
- description: 'Llama 3.1指令调优的文本模型,针对多语言对话用例进行了优化,在许多可用的开源和封闭聊天模型中,在常见行业基准上表现优异。',
121
+ description:
122
+ 'Llama 3.1指令调优的文本模型,针对多语言对话用例进行了优化,在许多可用的开源和封闭聊天模型中,在常见行业基准上表现优异。',
116
123
  displayName: 'Meta Llama 3.1 8B',
117
124
  id: 'meta-llama-3.1-8b-instruct',
118
125
  maxOutput: 4096,
119
126
  tokens: 131_072,
120
127
  },
121
128
  {
122
- description: 'Llama 3.1指令调优的文本模型,针对多语言对话用例进行了优化,在许多可用的开源和封闭聊天模型中,在常见行业基准上表现优异。',
129
+ description:
130
+ 'Llama 3.1指令调优的文本模型,针对多语言对话用例进行了优化,在许多可用的开源和封闭聊天模型中,在常见行业基准上表现优异。',
123
131
  displayName: 'Meta Llama 3.1 70B',
124
132
  id: 'meta-llama-3.1-70b-instruct',
125
133
  maxOutput: 4096,
126
134
  tokens: 131_072,
127
135
  },
128
136
  {
129
- description: 'Llama 3.1指令调优的文本模型,针对多语言对话用例进行了优化,在许多可用的开源和封闭聊天模型中,在常见行业基准上表现优异。',
137
+ description:
138
+ 'Llama 3.1指令调优的文本模型,针对多语言对话用例进行了优化,在许多可用的开源和封闭聊天模型中,在常见行业基准上表现优异。',
130
139
  displayName: 'Meta Llama 3.1 405B',
131
140
  id: 'meta-llama-3.1-405b-instruct',
132
141
  maxOutput: 4096,
@@ -209,7 +218,7 @@ const Github: ModelProviderCard = {
209
218
  description: '通过GitHub模型,开发人员可以成为AI工程师,并使用行业领先的AI模型进行构建。',
210
219
  enabled: true,
211
220
  id: 'github',
212
- // modelList: { showModelFetcher: true },
221
+ modelList: { showModelFetcher: true }, // I'm not sure if it is good to show the model fetcher, as remote list is not complete.
213
222
  name: 'GitHub',
214
223
  url: 'https://github.com/marketplace/models',
215
224
  };
@@ -21,15 +21,10 @@ let instance: LobeOpenAICompatibleRuntime;
21
21
 
22
22
  beforeEach(() => {
23
23
  instance = new LobeGithubAI({ apiKey: 'test' });
24
-
25
- // Use vi.spyOn to mock the chat.completions.create method
26
- vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
27
- new ReadableStream() as any,
28
- );
29
24
  });
30
25
 
31
26
  afterEach(() => {
32
- vi.clearAllMocks();
27
+ vi.restoreAllMocks();
33
28
  });
34
29
 
35
30
  describe('LobeGithubAI', () => {
@@ -42,6 +37,13 @@ describe('LobeGithubAI', () => {
42
37
  });
43
38
 
44
39
  describe('chat', () => {
40
+ beforeEach(() => {
41
+ // Use vi.spyOn to mock the chat.completions.create method
42
+ vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
43
+ new ReadableStream() as any,
44
+ );
45
+ });
46
+
45
47
  describe('Error', () => {
46
48
  it('should return GithubBizError with an openai error response when OpenAI.APIError is thrown', async () => {
47
49
  // Arrange
@@ -119,41 +121,6 @@ describe('LobeGithubAI', () => {
119
121
  }
120
122
  });
121
123
 
122
- it('should return GithubBizError with an cause response with desensitize Url', async () => {
123
- // Arrange
124
- const errorInfo = {
125
- stack: 'abc',
126
- cause: { message: 'api is undefined' },
127
- };
128
- const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
129
-
130
- instance = new LobeGithubAI({
131
- apiKey: 'test',
132
- baseURL: 'https://api.abc.com/v1',
133
- });
134
-
135
- vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
136
-
137
- // Act
138
- try {
139
- await instance.chat({
140
- messages: [{ content: 'Hello', role: 'user' }],
141
- model: 'meta-llama-3-70b-instruct',
142
- temperature: 0.7,
143
- });
144
- } catch (e) {
145
- expect(e).toEqual({
146
- endpoint: 'https://api.***.com/v1',
147
- error: {
148
- cause: { message: 'api is undefined' },
149
- stack: 'abc',
150
- },
151
- errorType: bizErrorType,
152
- provider,
153
- });
154
- }
155
- });
156
-
157
124
  it('should throw an InvalidGithubToken error type on 401 status code', async () => {
158
125
  // Mock the API call to simulate a 401 error
159
126
  const error = new Error('InvalidApiKey') as any;
@@ -243,4 +210,64 @@ describe('LobeGithubAI', () => {
243
210
  });
244
211
  });
245
212
  });
213
+
214
+ describe('models', () => {
215
+ beforeEach(() => {});
216
+
217
+ it('should return a list of models', async () => {
218
+ // Arrange
219
+ const arr = [
220
+ {
221
+ id: 'azureml://registries/azureml-ai21/models/AI21-Jamba-Instruct/versions/2',
222
+ name: 'AI21-Jamba-Instruct',
223
+ friendly_name: 'AI21-Jamba-Instruct',
224
+ model_version: 2,
225
+ publisher: 'AI21 Labs',
226
+ model_family: 'AI21 Labs',
227
+ model_registry: 'azureml-ai21',
228
+ license: 'custom',
229
+ task: 'chat-completion',
230
+ description:
231
+ "Jamba-Instruct is the world's first production-grade Mamba-based LLM model and leverages its hybrid Mamba-Transformer architecture to achieve best-in-class performance, quality, and cost efficiency.\n\n**Model Developer Name**: _AI21 Labs_\n\n## Model Architecture\n\nJamba-Instruct leverages a hybrid Mamba-Transformer architecture to achieve best-in-class performance, quality, and cost efficiency.\nAI21's Jamba architecture features a blocks-and-layers approach that allows Jamba to successfully integrate the two architectures. Each Jamba block contains either an attention or a Mamba layer, followed by a multi-layer perceptron (MLP), producing an overall ratio of one Transformer layer out of every eight total layers.\n",
232
+ summary:
233
+ "Jamba-Instruct is the world's first production-grade Mamba-based LLM model and leverages its hybrid Mamba-Transformer architecture to achieve best-in-class performance, quality, and cost efficiency.",
234
+ tags: ['chat', 'rag'],
235
+ },
236
+ {
237
+ id: 'azureml://registries/azureml-cohere/models/Cohere-command-r/versions/3',
238
+ name: 'Cohere-command-r',
239
+ friendly_name: 'Cohere Command R',
240
+ model_version: 3,
241
+ publisher: 'cohere',
242
+ model_family: 'cohere',
243
+ model_registry: 'azureml-cohere',
244
+ license: 'custom',
245
+ task: 'chat-completion',
246
+ description:
247
+ "Command R is a highly performant generative large language model, optimized for a variety of use cases including reasoning, summarization, and question answering. \n\nThe model is optimized to perform well in the following languages: English, French, Spanish, Italian, German, Brazilian Portuguese, Japanese, Korean, Simplified Chinese, and Arabic.\n\nPre-training data additionally included the following 13 languages: Russian, Polish, Turkish, Vietnamese, Dutch, Czech, Indonesian, Ukrainian, Romanian, Greek, Hindi, Hebrew, Persian.\n\n## Resources\n\nFor full details of this model, [release blog post](https://aka.ms/cohere-blog).\n\n## Model Architecture\n\nThis is an auto-regressive language model that uses an optimized transformer architecture. After pretraining, this model uses supervised fine-tuning (SFT) and preference training to align model behavior to human preferences for helpfulness and safety.\n\n### Tool use capabilities\n\nCommand R has been specifically trained with conversational tool use capabilities. These have been trained into the model via a mixture of supervised fine-tuning and preference fine-tuning, using a specific prompt template. Deviating from this prompt template will likely reduce performance, but we encourage experimentation.\n\nCommand R's tool use functionality takes a conversation as input (with an optional user-system preamble), along with a list of available tools. The model will then generate a json-formatted list of actions to execute on a subset of those tools. Command R may use one of its supplied tools more than once.\n\nThe model has been trained to recognise a special directly_answer tool, which it uses to indicate that it doesn't want to use any of its other tools. The ability to abstain from calling a specific tool can be useful in a range of situations, such as greeting a user, or asking clarifying questions. We recommend including the directly_answer tool, but it can be removed or renamed if required.\n\n### Grounded Generation and RAG Capabilities\n\nCommand R has been specifically trained with grounded generation capabilities. This means that it can generate responses based on a list of supplied document snippets, and it will include grounding spans (citations) in its response indicating the source of the information. This can be used to enable behaviors such as grounded summarization and the final step of Retrieval Augmented Generation (RAG).This behavior has been trained into the model via a mixture of supervised fine-tuning and preference fine-tuning, using a specific prompt template. Deviating from this prompt template may reduce performance, but we encourage experimentation.\n\nCommand R's grounded generation behavior takes a conversation as input (with an optional user-supplied system preamble, indicating task, context and desired output style), along with a list of retrieved document snippets. The document snippets should be chunks, rather than long documents, typically around 100-400 words per chunk. Document snippets consist of key-value pairs. The keys should be short descriptive strings, the values can be text or semi-structured.\n\nBy default, Command R will generate grounded responses by first predicting which documents are relevant, then predicting which ones it will cite, then generating an answer. Finally, it will then insert grounding spans into the answer. See below for an example. This is referred to as accurate grounded generation.\n\nThe model is trained with a number of other answering modes, which can be selected by prompt changes . A fast citation mode is supported in the tokenizer, which will directly generate an answer with grounding spans in it, without first writing the answer out in full. This sacrifices some grounding accuracy in favor of generating fewer tokens.\n\n### Code Capabilities\n\nCommand R has been optimized to interact with your code, by requesting code snippets, code explanations, or code rewrites. It might not perform well out-of-the-box for pure code completion. For better performance, we also recommend using a low temperature (and even greedy decoding) for code-generation related instructions.\n",
248
+ summary:
249
+ 'Command R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise.',
250
+ tags: ['rag', 'multilingual'],
251
+ },
252
+ ];
253
+ vi.spyOn(instance['client'].models, 'list').mockResolvedValue({
254
+ body: arr,
255
+ } as any);
256
+
257
+ // Act & Assert
258
+ const models = await instance.models();
259
+
260
+ const modelsCount = models.length;
261
+ expect(modelsCount).toBe(arr.length);
262
+
263
+ for (let i = 0; i < arr.length; i++) {
264
+ const model = models[i];
265
+ expect(model).toEqual({
266
+ description: arr[i].description,
267
+ displayName: arr[i].friendly_name,
268
+ id: arr[i].name,
269
+ });
270
+ }
271
+ });
272
+ });
246
273
  });
@@ -1,7 +1,35 @@
1
+ import { LOBE_DEFAULT_MODEL_LIST } from '@/config/modelProviders';
2
+ import type { ChatModelCard } from '@/types/llm';
3
+
1
4
  import { AgentRuntimeErrorType } from '../error';
2
5
  import { o1Models, pruneO1Payload } from '../openai';
3
6
  import { ModelProvider } from '../types';
4
- import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';
7
+ import {
8
+ CHAT_MODELS_BLOCK_LIST,
9
+ LobeOpenAICompatibleFactory,
10
+ } from '../utils/openaiCompatibleFactory';
11
+
12
+ enum Task {
13
+ 'chat-completion',
14
+ 'embeddings',
15
+ }
16
+
17
+ /* eslint-disable typescript-sort-keys/interface */
18
+ type Model = {
19
+ id: string;
20
+ name: string;
21
+ friendly_name: string;
22
+ model_version: number;
23
+ publisher: string;
24
+ model_family: string;
25
+ model_registry: string;
26
+ license: string;
27
+ task: Task;
28
+ description: string;
29
+ summary: string;
30
+ tags: string[];
31
+ };
32
+ /* eslint-enable typescript-sort-keys/interface */
5
33
 
6
34
  export const LobeGithubAI = LobeOpenAICompatibleFactory({
7
35
  baseURL: 'https://models.inference.ai.azure.com',
@@ -23,5 +51,27 @@ export const LobeGithubAI = LobeOpenAICompatibleFactory({
23
51
  bizError: AgentRuntimeErrorType.ProviderBizError,
24
52
  invalidAPIKey: AgentRuntimeErrorType.InvalidGithubToken,
25
53
  },
54
+ models: async ({ client }) => {
55
+ const modelsPage = (await client.models.list()) as any;
56
+ const modelList: Model[] = modelsPage.body;
57
+ return modelList
58
+ .filter((model) => {
59
+ return CHAT_MODELS_BLOCK_LIST.every(
60
+ (keyword) => !model.name.toLowerCase().includes(keyword),
61
+ );
62
+ })
63
+ .map((model) => {
64
+ const knownModel = LOBE_DEFAULT_MODEL_LIST.find((m) => m.id === model.name);
65
+
66
+ if (knownModel) return knownModel;
67
+
68
+ return {
69
+ description: model.description,
70
+ displayName: model.friendly_name,
71
+ id: model.name,
72
+ };
73
+ })
74
+ .filter(Boolean) as ChatModelCard[];
75
+ },
26
76
  provider: ModelProvider.Github,
27
77
  });
@@ -16,7 +16,8 @@ export const LobeTogetherAI = LobeOpenAICompatibleFactory({
16
16
  debug: {
17
17
  chatCompletion: () => process.env.DEBUG_TOGETHERAI_CHAT_COMPLETION === '1',
18
18
  },
19
- models: async ({ apiKey }) => {
19
+ models: async ({ client }) => {
20
+ const apiKey = client.apiKey;
20
21
  const data = await fetch(`${baseURL}/api/models`, {
21
22
  headers: {
22
23
  Authorization: `Bearer ${apiKey}`,
@@ -2,17 +2,18 @@ import OpenAI, { ClientOptions } from 'openai';
2
2
  import { Stream } from 'openai/streaming';
3
3
 
4
4
  import { LOBE_DEFAULT_MODEL_LIST } from '@/config/modelProviders';
5
- import { ChatModelCard } from '@/types/llm';
5
+ import type { ChatModelCard } from '@/types/llm';
6
6
 
7
7
  import { LobeRuntimeAI } from '../../BaseAI';
8
8
  import { AgentRuntimeErrorType, ILobeAgentRuntimeErrorType } from '../../error';
9
- import {
9
+ import type {
10
10
  ChatCompetitionOptions,
11
11
  ChatCompletionErrorPayload,
12
12
  ChatStreamPayload,
13
13
  Embeddings,
14
14
  EmbeddingsOptions,
15
15
  EmbeddingsPayload,
16
+ ModelProvider,
16
17
  TextToImagePayload,
17
18
  TextToSpeechOptions,
18
19
  TextToSpeechPayload,
@@ -26,7 +27,7 @@ import { StreamingResponse } from '../response';
26
27
  import { OpenAIStream, OpenAIStreamOptions } from '../streams';
27
28
 
28
29
  // the model contains the following keywords is not a chat model, so we should filter them out
29
- const CHAT_MODELS_BLOCK_LIST = [
30
+ export const CHAT_MODELS_BLOCK_LIST = [
30
31
  'embedding',
31
32
  'davinci',
32
33
  'curie',
@@ -77,7 +78,7 @@ interface OpenAICompatibleFactoryOptions<T extends Record<string, any> = any> {
77
78
  invalidAPIKey: ILobeAgentRuntimeErrorType;
78
79
  };
79
80
  models?:
80
- | ((params: { apiKey: string }) => Promise<ChatModelCard[]>)
81
+ | ((params: { client: OpenAI }) => Promise<ChatModelCard[]>)
81
82
  | {
82
83
  transformModel?: (model: OpenAI.Model) => ChatModelCard;
83
84
  };
@@ -157,7 +158,7 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
157
158
  client!: OpenAI;
158
159
 
159
160
  baseURL!: string;
160
- private _options: ConstructorOptions<T>;
161
+ protected _options: ConstructorOptions<T>;
161
162
 
162
163
  constructor(options: ClientOptions & Record<string, any> = {}) {
163
164
  const _options = {
@@ -249,7 +250,7 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
249
250
  }
250
251
 
251
252
  async models() {
252
- if (typeof models === 'function') return models({ apiKey: this.client.apiKey });
253
+ if (typeof models === 'function') return models({ client: this.client });
253
254
 
254
255
  const list = await this.client.models.list();
255
256
 
@@ -312,7 +313,7 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
312
313
  }
313
314
  }
314
315
 
315
- private handleError(error: any): ChatCompletionErrorPayload {
316
+ protected handleError(error: any): ChatCompletionErrorPayload {
316
317
  let desensitizedEndpoint = this.baseURL;
317
318
 
318
319
  // refs: https://github.com/lobehub/lobe-chat/issues/842
@@ -337,7 +338,7 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
337
338
  endpoint: desensitizedEndpoint,
338
339
  error: error as any,
339
340
  errorType: ErrorType.invalidAPIKey,
340
- provider: provider as any,
341
+ provider: provider as ModelProvider,
341
342
  });
342
343
  }
343
344
 
@@ -353,7 +354,7 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
353
354
  endpoint: desensitizedEndpoint,
354
355
  error: errorResult,
355
356
  errorType: RuntimeError || ErrorType.bizError,
356
- provider: provider as any,
357
+ provider: provider as ModelProvider,
357
358
  });
358
359
  }
359
360
  };