@lobehub/chat 1.34.5 → 1.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,6 +61,14 @@ const TogetherAI: ModelProviderCard = {
61
61
  id: 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo',
62
62
  tokens: 130_815,
63
63
  },
64
+ {
65
+ description:
66
+ 'Llama 3.1 Nemotron 70B 是由 NVIDIA 定制的大型语言模型,旨在提高 LLM 生成的响应对用户查询的帮助程度。该模型在 Arena Hard、AlpacaEval 2 LC 和 GPT-4-Turbo MT-Bench 等基准测试中表现出色,截至 2024 年 10 月 1 日,在所有三个自动对齐基准测试中排名第一。该模型使用 RLHF(特别是 REINFORCE)、Llama-3.1-Nemotron-70B-Reward 和 HelpSteer2-Preference 提示在 Llama-3.1-70B-Instruct 模型基础上进行训练',
67
+ displayName: 'Llama 3.1 Nemotron 70B',
68
+ enabled: true,
69
+ id: 'nvidia/Llama-3.1-Nemotron-70B-Instruct-HF',
70
+ tokens: 32_768,
71
+ },
64
72
  {
65
73
  description: 'Llama 3 8B Instruct Turbo 是一款高效能的大语言模型,支持广泛的应用场景。',
66
74
  displayName: 'Llama 3 8B Instruct Turbo',
@@ -110,6 +118,12 @@ const TogetherAI: ModelProviderCard = {
110
118
  id: 'meta-llama/Llama-2-70b-hf',
111
119
  tokens: 4096,
112
120
  },
121
+ {
122
+ description: 'Code Llama 是一款专注于代码生成和讨论的 LLM,结合广泛的编程语言支持,适用于开发者环境。',
123
+ displayName: 'CodeLlama 34B Instruct',
124
+ id: 'codellama/CodeLlama-34b-Instruct-hf',
125
+ tokens: 16_384,
126
+ },
113
127
  {
114
128
  description: 'Gemma 2 9B 由Google开发,提供高效的指令响应和综合能力。',
115
129
  displayName: 'Gemma 2 9B',
@@ -177,6 +191,12 @@ const TogetherAI: ModelProviderCard = {
177
191
  id: 'mistralai/Mixtral-8x22B-Instruct-v0.1',
178
192
  tokens: 65_536,
179
193
  },
194
+ {
195
+ description: 'WizardLM 2 是微软AI提供的语言模型,在复杂对话、多语言、推理和智能助手领域表现尤为出色。',
196
+ displayName: 'WizardLM-2 8x22B',
197
+ id: 'microsoft/WizardLM-2-8x22B',
198
+ tokens: 65_536,
199
+ },
180
200
  {
181
201
  description: 'DeepSeek LLM Chat (67B) 是创新的 AI 模型 提供深度语言理解和互动能力。',
182
202
  displayName: 'DeepSeek LLM Chat (67B)',
@@ -184,6 +204,13 @@ const TogetherAI: ModelProviderCard = {
184
204
  id: 'deepseek-ai/deepseek-llm-67b-chat',
185
205
  tokens: 4096,
186
206
  },
207
+ {
208
+ description: 'QwQ模型是由 Qwen 团队开发的实验性研究模型,专注于增强 AI 推理能力。',
209
+ displayName: 'QwQ 32B Preview',
210
+ enabled: true,
211
+ id: 'Qwen/QwQ-32B-Preview',
212
+ tokens: 32_768,
213
+ },
187
214
  {
188
215
  description: 'Qwen2.5 是全新的大型语言模型系列,旨在优化指令式任务的处理。',
189
216
  displayName: 'Qwen 2.5 7B Instruct Turbo',
@@ -199,21 +226,15 @@ const TogetherAI: ModelProviderCard = {
199
226
  tokens: 32_768,
200
227
  },
201
228
  {
202
- description: 'Qwen 2 Instruct (72B) 为企业级应用提供精准的指令理解和响应。',
203
- displayName: 'Qwen 2 Instruct (72B)',
204
- id: 'Qwen/Qwen2-72B-Instruct',
205
- tokens: 32_768,
206
- },
207
- {
208
- description: 'Qwen 1.5 Chat (72B) 提供快速响应和自然对话能力,适合多语言环境。',
209
- displayName: 'Qwen 1.5 Chat (72B)',
210
- id: 'Qwen/Qwen1.5-72B-Chat',
229
+ description: 'Qwen2.5 Coder 32B Instruct 是阿里云发布的代码特定大语言模型系列的最新版本。该模型在 Qwen2.5 的基础上,通过 5.5 万亿个 tokens 的训练,显著提升了代码生成、推理和修复能力。它不仅增强了编码能力,还保持了数学和通用能力的优势。模型为代码智能体等实际应用提供了更全面的基础',
230
+ displayName: 'Qwen 2.5 Coder 32B Instruct',
231
+ id: 'Qwen/Qwen2.5-Coder-32B-Instruct',
211
232
  tokens: 32_768,
212
233
  },
213
234
  {
214
- description: 'Qwen 1.5 Chat (110B) 是一款高效能的对话模型,支持复杂对话场景。',
215
- displayName: 'Qwen 1.5 Chat (110B)',
216
- id: 'Qwen/Qwen1.5-110B-Chat',
235
+ description: 'Qwen 2 Instruct (72B) 为企业级应用提供精准的指令理解和响应。',
236
+ displayName: 'Qwen 2 Instruct (72B)',
237
+ id: 'Qwen/Qwen2-72B-Instruct',
217
238
  tokens: 32_768,
218
239
  },
219
240
  {
@@ -234,12 +255,6 @@ const TogetherAI: ModelProviderCard = {
234
255
  id: 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO',
235
256
  tokens: 32_768,
236
257
  },
237
- {
238
- description: 'Nous Hermes-2 Yi (34B) 提供优化的语言输出和多样化的应用可能。',
239
- displayName: 'Nous Hermes-2 Yi (34B)',
240
- id: 'NousResearch/Nous-Hermes-2-Yi-34B',
241
- tokens: 4096,
242
- },
243
258
  {
244
259
  description: 'MythoMax-L2 (13B) 是一种创新模型,适合多领域应用和复杂任务。',
245
260
  displayName: 'MythoMax-L2 (13B)',
@@ -1,4 +1,4 @@
1
- import { Ollama } from 'ollama/browser';
1
+ import { Ollama, Tool } from 'ollama/browser';
2
2
  import { ClientOptions } from 'openai';
3
3
 
4
4
  import { OpenAIChatMessage } from '@/libs/agent-runtime';
@@ -8,8 +8,9 @@ import { LobeRuntimeAI } from '../BaseAI';
8
8
  import { AgentRuntimeErrorType } from '../error';
9
9
  import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../types';
10
10
  import { AgentRuntimeError } from '../utils/createError';
11
+ import { debugStream } from '../utils/debugStream';
11
12
  import { StreamingResponse } from '../utils/response';
12
- import { OllamaStream } from '../utils/streams';
13
+ import { OllamaStream, convertIterableToStream } from '../utils/streams';
13
14
  import { parseDataUri } from '../utils/uriParser';
14
15
  import { OllamaMessage } from './type';
15
16
 
@@ -45,23 +46,38 @@ export class LobeOllamaAI implements LobeRuntimeAI {
45
46
  options: {
46
47
  frequency_penalty: payload.frequency_penalty,
47
48
  presence_penalty: payload.presence_penalty,
48
- temperature:
49
- payload.temperature !== undefined
50
- ? payload.temperature / 2
51
- : undefined,
49
+ temperature: payload.temperature !== undefined ? payload.temperature / 2 : undefined,
52
50
  top_p: payload.top_p,
53
51
  },
54
52
  stream: true,
53
+ tools: payload.tools as Tool[],
55
54
  });
56
55
 
57
- return StreamingResponse(OllamaStream(response, options?.callback), {
56
+ const stream = convertIterableToStream(response);
57
+ const [prod, debug] = stream.tee();
58
+
59
+ if (process.env.DEBUG_OLLAMA_CHAT_COMPLETION === '1') {
60
+ debugStream(debug).catch(console.error);
61
+ }
62
+
63
+ return StreamingResponse(OllamaStream(prod, options?.callback), {
58
64
  headers: options?.headers,
59
65
  });
60
66
  } catch (error) {
61
- const e = error as { message: string; name: string; status_code: number };
67
+ const e = error as {
68
+ error: any;
69
+ message: string;
70
+ name: string;
71
+ status_code: number;
72
+ };
62
73
 
63
74
  throw AgentRuntimeError.chat({
64
- error: { message: e.message, name: e.name, status_code: e.status_code },
75
+ error: {
76
+ ...e.error,
77
+ message: String(e.error?.message || e.message),
78
+ name: e.name,
79
+ status_code: e.status_code,
80
+ },
65
81
  errorType: AgentRuntimeErrorType.OllamaBizError,
66
82
  provider: ModelProvider.Ollama,
67
83
  });
@@ -6,61 +6,145 @@ import * as uuidModule from '@/utils/uuid';
6
6
  import { OllamaStream } from './ollama';
7
7
 
8
8
  describe('OllamaStream', () => {
9
- it('should transform Ollama stream to protocol stream', async () => {
10
- vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
9
+ describe('should transform Ollama stream to protocol stream', () => {
10
+ it('text', async () => {
11
+ vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
12
+
13
+ const mockOllamaStream = new ReadableStream<ChatResponse>({
14
+ start(controller) {
15
+ controller.enqueue({ message: { content: 'Hello' }, done: false } as ChatResponse);
16
+ controller.enqueue({ message: { content: ' world!' }, done: false } as ChatResponse);
17
+ controller.enqueue({ message: { content: '' }, done: true } as ChatResponse);
18
+
19
+ controller.close();
20
+ },
21
+ });
22
+
23
+ const onStartMock = vi.fn();
24
+ const onTextMock = vi.fn();
25
+ const onTokenMock = vi.fn();
26
+ const onCompletionMock = vi.fn();
27
+
28
+ const protocolStream = OllamaStream(mockOllamaStream, {
29
+ onStart: onStartMock,
30
+ onText: onTextMock,
31
+ onToken: onTokenMock,
32
+ onCompletion: onCompletionMock,
33
+ });
34
+
35
+ const decoder = new TextDecoder();
36
+ const chunks = [];
11
37
 
12
- const mockOllamaStream: AsyncIterable<ChatResponse> = {
13
38
  // @ts-ignore
14
- async *[Symbol.asyncIterator]() {
15
- yield { message: { content: 'Hello' }, done: false };
16
- yield { message: { content: ' world!' }, done: false };
17
- yield { message: { content: '' }, done: true };
18
- },
19
- };
20
-
21
- const onStartMock = vi.fn();
22
- const onTextMock = vi.fn();
23
- const onTokenMock = vi.fn();
24
- const onCompletionMock = vi.fn();
25
-
26
- const protocolStream = OllamaStream(mockOllamaStream, {
27
- onStart: onStartMock,
28
- onText: onTextMock,
29
- onToken: onTokenMock,
30
- onCompletion: onCompletionMock,
39
+ for await (const chunk of protocolStream) {
40
+ chunks.push(decoder.decode(chunk, { stream: true }));
41
+ }
42
+
43
+ expect(chunks).toEqual([
44
+ 'id: chat_1\n',
45
+ 'event: text\n',
46
+ `data: "Hello"\n\n`,
47
+ 'id: chat_1\n',
48
+ 'event: text\n',
49
+ `data: " world!"\n\n`,
50
+ 'id: chat_1\n',
51
+ 'event: stop\n',
52
+ `data: "finished"\n\n`,
53
+ ]);
54
+
55
+ expect(onStartMock).toHaveBeenCalledTimes(1);
56
+ expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
57
+ expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
58
+ expect(onTokenMock).toHaveBeenCalledTimes(2);
59
+ expect(onCompletionMock).toHaveBeenCalledTimes(1);
31
60
  });
32
61
 
33
- const decoder = new TextDecoder();
34
- const chunks = [];
62
+ it('tools use', async () => {
63
+ vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
35
64
 
36
- // @ts-ignore
37
- for await (const chunk of protocolStream) {
38
- chunks.push(decoder.decode(chunk, { stream: true }));
39
- }
65
+ const mockOllamaStream = new ReadableStream<ChatResponse>({
66
+ start(controller) {
67
+ controller.enqueue({
68
+ model: 'qwen2.5',
69
+ created_at: new Date('2024-12-01T03:34:55.166692Z'),
70
+ message: {
71
+ role: 'assistant',
72
+ content: '',
73
+ tool_calls: [
74
+ {
75
+ function: {
76
+ name: 'realtime-weather____fetchCurrentWeather',
77
+ arguments: { city: '杭州' },
78
+ },
79
+ },
80
+ ],
81
+ },
82
+ done: false,
83
+ } as unknown as ChatResponse);
84
+ controller.enqueue({
85
+ model: 'qwen2.5',
86
+ created_at: '2024-12-01T03:34:55.2133Z',
87
+ message: { role: 'assistant', content: '' },
88
+ done_reason: 'stop',
89
+ done: true,
90
+ total_duration: 1122415333,
91
+ load_duration: 26178333,
92
+ prompt_eval_count: 221,
93
+ prompt_eval_duration: 507000000,
94
+ eval_count: 26,
95
+ eval_duration: 583000000,
96
+ } as unknown as ChatResponse);
97
+
98
+ controller.close();
99
+ },
100
+ });
101
+ const onStartMock = vi.fn();
102
+ const onTextMock = vi.fn();
103
+ const onTokenMock = vi.fn();
104
+ const onToolCall = vi.fn();
105
+ const onCompletionMock = vi.fn();
40
106
 
41
- expect(chunks).toEqual([
42
- 'id: chat_1\n',
43
- 'event: text\n',
44
- `data: "Hello"\n\n`,
45
- 'id: chat_1\n',
46
- 'event: text\n',
47
- `data: " world!"\n\n`,
48
- 'id: chat_1\n',
49
- 'event: stop\n',
50
- `data: "finished"\n\n`,
51
- ]);
52
-
53
- expect(onStartMock).toHaveBeenCalledTimes(1);
54
- expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
55
- expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
56
- expect(onTokenMock).toHaveBeenCalledTimes(2);
57
- expect(onCompletionMock).toHaveBeenCalledTimes(1);
107
+ const protocolStream = OllamaStream(mockOllamaStream, {
108
+ onStart: onStartMock,
109
+ onText: onTextMock,
110
+ onToken: onTokenMock,
111
+ onCompletion: onCompletionMock,
112
+ onToolCall,
113
+ });
114
+
115
+ const decoder = new TextDecoder();
116
+ const chunks = [];
117
+
118
+ // @ts-ignore
119
+ for await (const chunk of protocolStream) {
120
+ chunks.push(decoder.decode(chunk, { stream: true }));
121
+ }
122
+
123
+ expect(chunks).toEqual(
124
+ [
125
+ 'id: chat_1',
126
+ 'event: tool_calls',
127
+ `data: [{"function":{"arguments":"{\\"city\\":\\"杭州\\"}","name":"realtime-weather____fetchCurrentWeather"},"id":"realtime-weather____fetchCurrentWeather_0","index":0,"type":"function"}]\n`,
128
+ 'id: chat_1',
129
+ 'event: stop',
130
+ `data: "finished"\n`,
131
+ ].map((i) => `${i}\n`),
132
+ );
133
+
134
+ expect(onTextMock).toHaveBeenCalledTimes(0);
135
+ expect(onStartMock).toHaveBeenCalledTimes(1);
136
+ expect(onToolCall).toHaveBeenCalledTimes(1);
137
+ expect(onTokenMock).toHaveBeenCalledTimes(0);
138
+ expect(onCompletionMock).toHaveBeenCalledTimes(1);
139
+ });
58
140
  });
59
141
 
60
142
  it('should handle empty stream', async () => {
61
- const mockOllamaStream = {
62
- async *[Symbol.asyncIterator]() {},
63
- };
143
+ const mockOllamaStream = new ReadableStream<ChatResponse>({
144
+ start(controller) {
145
+ controller.close();
146
+ },
147
+ });
64
148
 
65
149
  const protocolStream = OllamaStream(mockOllamaStream);
66
150
 
@@ -6,27 +6,42 @@ import { nanoid } from '@/utils/uuid';
6
6
  import {
7
7
  StreamProtocolChunk,
8
8
  StreamStack,
9
- convertIterableToStream,
10
9
  createCallbacksTransformer,
11
10
  createSSEProtocolTransformer,
11
+ generateToolCallId,
12
12
  } from './protocol';
13
13
 
14
14
  const transformOllamaStream = (chunk: ChatResponse, stack: StreamStack): StreamProtocolChunk => {
15
15
  // maybe need another structure to add support for multiple choices
16
- if (chunk.done) {
16
+ if (chunk.done && !chunk.message.content) {
17
17
  return { data: 'finished', id: stack.id, type: 'stop' };
18
18
  }
19
19
 
20
+ if (chunk.message.tool_calls && chunk.message.tool_calls.length > 0) {
21
+ return {
22
+ data: chunk.message.tool_calls.map((value, index) => ({
23
+ function: {
24
+ arguments: JSON.stringify(value.function?.arguments) ?? '{}',
25
+ name: value.function?.name ?? null,
26
+ },
27
+ id: generateToolCallId(index, value.function?.name),
28
+ index: index,
29
+ type: 'function',
30
+ })),
31
+ id: stack.id,
32
+ type: 'tool_calls',
33
+ };
34
+ }
20
35
  return { data: chunk.message.content, id: stack.id, type: 'text' };
21
36
  };
22
37
 
23
38
  export const OllamaStream = (
24
- res: AsyncIterable<ChatResponse>,
39
+ res: ReadableStream<ChatResponse>,
25
40
  cb?: ChatStreamCallbacks,
26
41
  ): ReadableStream<string> => {
27
42
  const streamStack: StreamStack = { id: 'chat_' + nanoid() };
28
43
 
29
- return convertIterableToStream(res)
44
+ return res
30
45
  .pipeThrough(createSSEProtocolTransformer(transformOllamaStream, streamStack))
31
46
  .pipeThrough(createCallbacksTransformer(cb));
32
47
  };
@@ -134,6 +134,7 @@ describe('initAgentRuntimeWithUserPayload method', () => {
134
134
  const runtime = await initAgentRuntimeWithUserPayload(ModelProvider.Ollama, jwtPayload);
135
135
  expect(runtime).toBeInstanceOf(AgentRuntime);
136
136
  expect(runtime['_runtime']).toBeInstanceOf(LobeOllamaAI);
137
+ expect(runtime['_runtime']['baseURL']).toEqual(jwtPayload.endpoint);
137
138
  });
138
139
 
139
140
  it('Perplexity AI provider: with apikey', async () => {
@@ -391,7 +392,7 @@ describe('initAgentRuntimeWithUserPayload method', () => {
391
392
  // endpoint 不存在,应返回 DEFAULT_BASE_URL
392
393
  expect(runtime['_runtime'].baseURL).toBe('https://dashscope.aliyuncs.com/compatible-mode/v1');
393
394
  });
394
-
395
+
395
396
  it('Unknown Provider', async () => {
396
397
  const jwtPayload = {};
397
398
  const runtime = await initAgentRuntimeWithUserPayload('unknown', jwtPayload);
@@ -33,7 +33,7 @@ const getLlmOptionsFromPayload = (provider: string, payload: JWTPayload) => {
33
33
  default: {
34
34
  let upperProvider = provider.toUpperCase();
35
35
 
36
- if (!( `${upperProvider}_API_KEY` in llmConfig)) {
36
+ if (!(`${upperProvider}_API_KEY` in llmConfig)) {
37
37
  upperProvider = ModelProvider.OpenAI.toUpperCase(); // Use OpenAI options as default
38
38
  }
39
39
 
@@ -43,6 +43,12 @@ const getLlmOptionsFromPayload = (provider: string, payload: JWTPayload) => {
43
43
  return baseURL ? { apiKey, baseURL } : { apiKey };
44
44
  }
45
45
 
46
+ case ModelProvider.Ollama: {
47
+ const baseURL = payload?.endpoint || process.env.OLLAMA_PROXY_URL;
48
+
49
+ return { baseURL };
50
+ }
51
+
46
52
  case ModelProvider.Azure: {
47
53
  const { AZURE_API_KEY, AZURE_API_VERSION, AZURE_ENDPOINT } = llmConfig;
48
54
  const apikey = apiKeyManager.pick(payload?.apiKey || AZURE_API_KEY);