npm - @lobehub/chat - Versions diffs - 1.34.5 → 1.35.0 - Mend

@lobehub/chat 1.34.5 → 1.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +51 -0
package/Dockerfile +1 -1
package/Dockerfile.database +1 -1
package/README.md +8 -8
package/README.zh-CN.md +8 -8
package/changelog/v1.json +18 -0
package/docs/changelog/2024-07-19-gpt-4o-mini.zh-CN.mdx +33 -0
package/docs/changelog/2024-08-02-lobe-chat-database-docker.zh-CN.mdx +34 -0
package/docs/changelog/2024-08-21-file-upload-and-knowledge-base.zh-CN.mdx +31 -0
package/docs/changelog/2024-09-13-openai-o1-models.zh-CN.mdx +29 -0
package/docs/changelog/2024-09-20-artifacts.zh-CN.mdx +56 -0
package/docs/changelog/2024-10-27-pin-assistant.zh-CN.mdx +31 -0
package/docs/changelog/2024-11-06-share-text-json.zh-CN.mdx +20 -0
package/docs/changelog/2024-11-27-forkable-chat.zh-CN.mdx +17 -0
package/docs/changelog/index.json +91 -1
package/docs/self-hosting/environment-variables/analytics.mdx +31 -2
package/package.json +3 -3
package/src/config/modelProviders/fireworksai.ts +14 -0
package/src/config/modelProviders/ollama.ts +14 -13
package/src/config/modelProviders/qwen.ts +14 -0
package/src/config/modelProviders/siliconcloud.ts +304 -54
package/src/config/modelProviders/togetherai.ts +33 -18
package/src/libs/agent-runtime/ollama/index.ts +25 -9
package/src/libs/agent-runtime/utils/streams/ollama.test.ts +130 -46
package/src/libs/agent-runtime/utils/streams/ollama.ts +19 -4
package/src/server/modules/AgentRuntime/index.test.ts +2 -1
package/src/server/modules/AgentRuntime/index.ts +7 -1

package/src/config/modelProviders/togetherai.ts CHANGED Viewed

@@ -61,6 +61,14 @@ const TogetherAI: ModelProviderCard = {
       id: 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo',
       tokens: 130_815,
     },
+    {
+      description:
+        'Llama 3.1 Nemotron 70B 是由 NVIDIA 定制的大型语言模型，旨在提高 LLM 生成的响应对用户查询的帮助程度。该模型在 Arena Hard、AlpacaEval 2 LC 和 GPT-4-Turbo MT-Bench 等基准测试中表现出色，截至 2024 年 10 月 1 日，在所有三个自动对齐基准测试中排名第一。该模型使用 RLHF（特别是 REINFORCE）、Llama-3.1-Nemotron-70B-Reward 和 HelpSteer2-Preference 提示在 Llama-3.1-70B-Instruct 模型基础上进行训练',
+      displayName: 'Llama 3.1 Nemotron 70B',
+      enabled: true,
+      id: 'nvidia/Llama-3.1-Nemotron-70B-Instruct-HF',
+      tokens: 32_768,
+    },
     {
       description: 'Llama 3 8B Instruct Turbo 是一款高效能的大语言模型，支持广泛的应用场景。',
       displayName: 'Llama 3 8B Instruct Turbo',
@@ -110,6 +118,12 @@ const TogetherAI: ModelProviderCard = {
       id: 'meta-llama/Llama-2-70b-hf',
       tokens: 4096,
     },
+    {
+      description: 'Code Llama 是一款专注于代码生成和讨论的 LLM，结合广泛的编程语言支持，适用于开发者环境。',
+      displayName: 'CodeLlama 34B Instruct',
+      id: 'codellama/CodeLlama-34b-Instruct-hf',
+      tokens: 16_384,
+    },
     {
       description: 'Gemma 2 9B 由Google开发，提供高效的指令响应和综合能力。',
       displayName: 'Gemma 2 9B',
@@ -177,6 +191,12 @@ const TogetherAI: ModelProviderCard = {
       id: 'mistralai/Mixtral-8x22B-Instruct-v0.1',
       tokens: 65_536,
     },
+    {
+      description: 'WizardLM 2 是微软AI提供的语言模型，在复杂对话、多语言、推理和智能助手领域表现尤为出色。',
+      displayName: 'WizardLM-2 8x22B',
+      id: 'microsoft/WizardLM-2-8x22B',
+      tokens: 65_536,
+    },
     {
       description: 'DeepSeek LLM Chat (67B) 是创新的 AI 模型 提供深度语言理解和互动能力。',
       displayName: 'DeepSeek LLM Chat (67B)',
@@ -184,6 +204,13 @@ const TogetherAI: ModelProviderCard = {
       id: 'deepseek-ai/deepseek-llm-67b-chat',
       tokens: 4096,
     },
+    {
+      description: 'QwQ模型是由 Qwen 团队开发的实验性研究模型，专注于增强 AI 推理能力。',
+      displayName: 'QwQ 32B Preview',
+      enabled: true,
+      id: 'Qwen/QwQ-32B-Preview',
+      tokens: 32_768,
+    },
     {
       description: 'Qwen2.5 是全新的大型语言模型系列，旨在优化指令式任务的处理。',
       displayName: 'Qwen 2.5 7B Instruct Turbo',
@@ -199,21 +226,15 @@ const TogetherAI: ModelProviderCard = {
       tokens: 32_768,
     },
     {
-      description: 'Qwen 2 Instruct (72B) 为企业级应用提供精准的指令理解和响应。',
-      displayName: 'Qwen 2 Instruct (72B)',
-      id: 'Qwen/Qwen2-72B-Instruct',
-      tokens: 32_768,
-    },
-    {
-      description: 'Qwen 1.5 Chat (72B) 提供快速响应和自然对话能力，适合多语言环境。',
-      displayName: 'Qwen 1.5 Chat (72B)',
-      id: 'Qwen/Qwen1.5-72B-Chat',
+      description: 'Qwen2.5 Coder 32B Instruct 是阿里云发布的代码特定大语言模型系列的最新版本。该模型在 Qwen2.5 的基础上，通过 5.5 万亿个 tokens 的训练，显著提升了代码生成、推理和修复能力。它不仅增强了编码能力，还保持了数学和通用能力的优势。模型为代码智能体等实际应用提供了更全面的基础',
+      displayName: 'Qwen 2.5 Coder 32B Instruct',
+      id: 'Qwen/Qwen2.5-Coder-32B-Instruct',
       tokens: 32_768,
     },
     {
-      description: 'Qwen 1.5 Chat (110B) 是一款高效能的对话模型，支持复杂对话场景。',
-      displayName: 'Qwen 1.5 Chat (110B)',
-      id: 'Qwen/Qwen1.5-110B-Chat',
+      description: 'Qwen 2 Instruct (72B) 为企业级应用提供精准的指令理解和响应。',
+      displayName: 'Qwen 2 Instruct (72B)',
+      id: 'Qwen/Qwen2-72B-Instruct',
       tokens: 32_768,
     },
     {
@@ -234,12 +255,6 @@ const TogetherAI: ModelProviderCard = {
       id: 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO',
       tokens: 32_768,
     },
-    {
-      description: 'Nous Hermes-2 Yi (34B) 提供优化的语言输出和多样化的应用可能。',
-      displayName: 'Nous Hermes-2 Yi (34B)',
-      id: 'NousResearch/Nous-Hermes-2-Yi-34B',
-      tokens: 4096,
-    },
     {
       description: 'MythoMax-L2 (13B) 是一种创新模型，适合多领域应用和复杂任务。',
       displayName: 'MythoMax-L2 (13B)',

package/src/libs/agent-runtime/ollama/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Ollama } from 'ollama/browser';
+import { Ollama, Tool } from 'ollama/browser';
 import { ClientOptions } from 'openai';
 import { OpenAIChatMessage } from '@/libs/agent-runtime';
@@ -8,8 +8,9 @@ import { LobeRuntimeAI } from '../BaseAI';
 import { AgentRuntimeErrorType } from '../error';
 import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../types';
 import { AgentRuntimeError } from '../utils/createError';
+import { debugStream } from '../utils/debugStream';
 import { StreamingResponse } from '../utils/response';
-import { OllamaStream } from '../utils/streams';
+import { OllamaStream, convertIterableToStream } from '../utils/streams';
 import { parseDataUri } from '../utils/uriParser';
 import { OllamaMessage } from './type';
@@ -45,23 +46,38 @@ export class LobeOllamaAI implements LobeRuntimeAI {
         options: {
           frequency_penalty: payload.frequency_penalty,
           presence_penalty: payload.presence_penalty,
-          temperature:
-            payload.temperature !== undefined
-            ? payload.temperature / 2
-            : undefined,
+          temperature: payload.temperature !== undefined ? payload.temperature / 2 : undefined,
           top_p: payload.top_p,
         },
         stream: true,
+        tools: payload.tools as Tool[],
       });
-      return StreamingResponse(OllamaStream(response, options?.callback), {
+      const stream = convertIterableToStream(response);
+      const [prod, debug] = stream.tee();
+      if (process.env.DEBUG_OLLAMA_CHAT_COMPLETION === '1') {
+        debugStream(debug).catch(console.error);
+      }
+      return StreamingResponse(OllamaStream(prod, options?.callback), {
         headers: options?.headers,
       });
     } catch (error) {
-      const e = error as { message: string; name: string; status_code: number };
+      const e = error as {
+        error: any;
+        message: string;
+        name: string;
+        status_code: number;
+      };
       throw AgentRuntimeError.chat({
-        error: { message: e.message, name: e.name, status_code: e.status_code },
+        error: {
+          ...e.error,
+          message: String(e.error?.message || e.message),
+          name: e.name,
+          status_code: e.status_code,
+        },
         errorType: AgentRuntimeErrorType.OllamaBizError,
         provider: ModelProvider.Ollama,
       });

package/src/libs/agent-runtime/utils/streams/ollama.test.ts CHANGED Viewed

@@ -6,61 +6,145 @@ import * as uuidModule from '@/utils/uuid';
 import { OllamaStream } from './ollama';
 describe('OllamaStream', () => {
-  it('should transform Ollama stream to protocol stream', async () => {
-    vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
+  describe('should transform Ollama stream to protocol stream', () => {
+    it('text', async () => {
+      vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
+      const mockOllamaStream = new ReadableStream<ChatResponse>({
+        start(controller) {
+          controller.enqueue({ message: { content: 'Hello' }, done: false } as ChatResponse);
+          controller.enqueue({ message: { content: ' world!' }, done: false } as ChatResponse);
+          controller.enqueue({ message: { content: '' }, done: true } as ChatResponse);
+          controller.close();
+        },
+      });
+      const onStartMock = vi.fn();
+      const onTextMock = vi.fn();
+      const onTokenMock = vi.fn();
+      const onCompletionMock = vi.fn();
+      const protocolStream = OllamaStream(mockOllamaStream, {
+        onStart: onStartMock,
+        onText: onTextMock,
+        onToken: onTokenMock,
+        onCompletion: onCompletionMock,
+      });
+      const decoder = new TextDecoder();
+      const chunks = [];
-    const mockOllamaStream: AsyncIterable<ChatResponse> = {
       // @ts-ignore
-      async *[Symbol.asyncIterator]() {
-        yield { message: { content: 'Hello' }, done: false };
-        yield { message: { content: ' world!' }, done: false };
-        yield { message: { content: '' }, done: true };
-      },
-    };
-    const onStartMock = vi.fn();
-    const onTextMock = vi.fn();
-    const onTokenMock = vi.fn();
-    const onCompletionMock = vi.fn();
-    const protocolStream = OllamaStream(mockOllamaStream, {
-      onStart: onStartMock,
-      onText: onTextMock,
-      onToken: onTokenMock,
-      onCompletion: onCompletionMock,
+      for await (const chunk of protocolStream) {
+        chunks.push(decoder.decode(chunk, { stream: true }));
+      }
+      expect(chunks).toEqual([
+        'id: chat_1\n',
+        'event: text\n',
+        `data: "Hello"\n\n`,
+        'id: chat_1\n',
+        'event: text\n',
+        `data: " world!"\n\n`,
+        'id: chat_1\n',
+        'event: stop\n',
+        `data: "finished"\n\n`,
+      ]);
+      expect(onStartMock).toHaveBeenCalledTimes(1);
+      expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
+      expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
+      expect(onTokenMock).toHaveBeenCalledTimes(2);
+      expect(onCompletionMock).toHaveBeenCalledTimes(1);
     });
-    const decoder = new TextDecoder();
-    const chunks = [];
+    it('tools use', async () => {
+      vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
-    // @ts-ignore
-    for await (const chunk of protocolStream) {
-      chunks.push(decoder.decode(chunk, { stream: true }));
-    }
+      const mockOllamaStream = new ReadableStream<ChatResponse>({
+        start(controller) {
+          controller.enqueue({
+            model: 'qwen2.5',
+            created_at: new Date('2024-12-01T03:34:55.166692Z'),
+            message: {
+              role: 'assistant',
+              content: '',
+              tool_calls: [
+                {
+                  function: {
+                    name: 'realtime-weather____fetchCurrentWeather',
+                    arguments: { city: '杭州' },
+                  },
+                },
+              ],
+            },
+            done: false,
+          } as unknown as ChatResponse);
+          controller.enqueue({
+            model: 'qwen2.5',
+            created_at: '2024-12-01T03:34:55.2133Z',
+            message: { role: 'assistant', content: '' },
+            done_reason: 'stop',
+            done: true,
+            total_duration: 1122415333,
+            load_duration: 26178333,
+            prompt_eval_count: 221,
+            prompt_eval_duration: 507000000,
+            eval_count: 26,
+            eval_duration: 583000000,
+          } as unknown as ChatResponse);
+          controller.close();
+        },
+      });
+      const onStartMock = vi.fn();
+      const onTextMock = vi.fn();
+      const onTokenMock = vi.fn();
+      const onToolCall = vi.fn();
+      const onCompletionMock = vi.fn();
-    expect(chunks).toEqual([
-      'id: chat_1\n',
-      'event: text\n',
-      `data: "Hello"\n\n`,
-      'id: chat_1\n',
-      'event: text\n',
-      `data: " world!"\n\n`,
-      'id: chat_1\n',
-      'event: stop\n',
-      `data: "finished"\n\n`,
-    ]);
-    expect(onStartMock).toHaveBeenCalledTimes(1);
-    expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
-    expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
-    expect(onTokenMock).toHaveBeenCalledTimes(2);
-    expect(onCompletionMock).toHaveBeenCalledTimes(1);
+      const protocolStream = OllamaStream(mockOllamaStream, {
+        onStart: onStartMock,
+        onText: onTextMock,
+        onToken: onTokenMock,
+        onCompletion: onCompletionMock,
+        onToolCall,
+      });
+      const decoder = new TextDecoder();
+      const chunks = [];
+      // @ts-ignore
+      for await (const chunk of protocolStream) {
+        chunks.push(decoder.decode(chunk, { stream: true }));
+      }
+      expect(chunks).toEqual(
+        [
+          'id: chat_1',
+          'event: tool_calls',
+          `data: [{"function":{"arguments":"{\\"city\\":\\"杭州\\"}","name":"realtime-weather____fetchCurrentWeather"},"id":"realtime-weather____fetchCurrentWeather_0","index":0,"type":"function"}]\n`,
+          'id: chat_1',
+          'event: stop',
+          `data: "finished"\n`,
+        ].map((i) => `${i}\n`),
+      );
+      expect(onTextMock).toHaveBeenCalledTimes(0);
+      expect(onStartMock).toHaveBeenCalledTimes(1);
+      expect(onToolCall).toHaveBeenCalledTimes(1);
+      expect(onTokenMock).toHaveBeenCalledTimes(0);
+      expect(onCompletionMock).toHaveBeenCalledTimes(1);
+    });
   });
   it('should handle empty stream', async () => {
-    const mockOllamaStream = {
-      async *[Symbol.asyncIterator]() {},
-    };
+    const mockOllamaStream = new ReadableStream<ChatResponse>({
+      start(controller) {
+        controller.close();
+      },
+    });
     const protocolStream = OllamaStream(mockOllamaStream);

package/src/libs/agent-runtime/utils/streams/ollama.ts CHANGED Viewed

@@ -6,27 +6,42 @@ import { nanoid } from '@/utils/uuid';
 import {
   StreamProtocolChunk,
   StreamStack,
-  convertIterableToStream,
   createCallbacksTransformer,
   createSSEProtocolTransformer,
+  generateToolCallId,
 } from './protocol';
 const transformOllamaStream = (chunk: ChatResponse, stack: StreamStack): StreamProtocolChunk => {
   // maybe need another structure to add support for multiple choices
-  if (chunk.done) {
+  if (chunk.done && !chunk.message.content) {
     return { data: 'finished', id: stack.id, type: 'stop' };
   }
+  if (chunk.message.tool_calls && chunk.message.tool_calls.length > 0) {
+    return {
+      data: chunk.message.tool_calls.map((value, index) => ({
+        function: {
+          arguments: JSON.stringify(value.function?.arguments) ?? '{}',
+          name: value.function?.name ?? null,
+        },
+        id: generateToolCallId(index, value.function?.name),
+        index: index,
+        type: 'function',
+      })),
+      id: stack.id,
+      type: 'tool_calls',
+    };
+  }
   return { data: chunk.message.content, id: stack.id, type: 'text' };
 };
 export const OllamaStream = (
-  res: AsyncIterable<ChatResponse>,
+  res: ReadableStream<ChatResponse>,
   cb?: ChatStreamCallbacks,
 ): ReadableStream<string> => {
   const streamStack: StreamStack = { id: 'chat_' + nanoid() };
-  return convertIterableToStream(res)
+  return res
     .pipeThrough(createSSEProtocolTransformer(transformOllamaStream, streamStack))
     .pipeThrough(createCallbacksTransformer(cb));
 };

package/src/server/modules/AgentRuntime/index.test.ts CHANGED Viewed

@@ -134,6 +134,7 @@ describe('initAgentRuntimeWithUserPayload method', () => {
       const runtime = await initAgentRuntimeWithUserPayload(ModelProvider.Ollama, jwtPayload);
       expect(runtime).toBeInstanceOf(AgentRuntime);
       expect(runtime['_runtime']).toBeInstanceOf(LobeOllamaAI);
+      expect(runtime['_runtime']['baseURL']).toEqual(jwtPayload.endpoint);
     });
     it('Perplexity AI provider: with apikey', async () => {
@@ -391,7 +392,7 @@ describe('initAgentRuntimeWithUserPayload method', () => {
       // endpoint 不存在，应返回 DEFAULT_BASE_URL
       expect(runtime['_runtime'].baseURL).toBe('https://dashscope.aliyuncs.com/compatible-mode/v1');
     });
     it('Unknown Provider', async () => {
       const jwtPayload = {};
       const runtime = await initAgentRuntimeWithUserPayload('unknown', jwtPayload);

package/src/server/modules/AgentRuntime/index.ts CHANGED Viewed

@@ -33,7 +33,7 @@ const getLlmOptionsFromPayload = (provider: string, payload: JWTPayload) => {
     default: {
       let upperProvider = provider.toUpperCase();
-      if (!( `${upperProvider}_API_KEY` in llmConfig)) {
+      if (!(`${upperProvider}_API_KEY` in llmConfig)) {
         upperProvider = ModelProvider.OpenAI.toUpperCase(); // Use OpenAI options as default
       }
@@ -43,6 +43,12 @@ const getLlmOptionsFromPayload = (provider: string, payload: JWTPayload) => {
       return baseURL ? { apiKey, baseURL } : { apiKey };
     }
+    case ModelProvider.Ollama: {
+      const baseURL = payload?.endpoint || process.env.OLLAMA_PROXY_URL;
+      return { baseURL };
+    }
     case ModelProvider.Azure: {
       const { AZURE_API_KEY, AZURE_API_VERSION, AZURE_ENDPOINT } = llmConfig;
       const apikey = apiKeyManager.pick(payload?.apiKey || AZURE_API_KEY);