npm - @lobehub/chat - Versions diffs - 1.84.25 → 1.84.27 - Mend

@lobehub/chat 1.84.25 → 1.84.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/CHANGELOG.md +50 -0
package/changelog/v1.json +18 -0
package/package.json +3 -3
package/src/config/aiModels/ollama.ts +12 -2
package/src/config/aiModels/vertexai.ts +6 -6
package/src/libs/agent-runtime/google/index.ts +2 -1
package/src/libs/agent-runtime/utils/streams/google-ai.test.ts +101 -6
package/src/libs/agent-runtime/utils/streams/google-ai.ts +62 -38
package/src/libs/agent-runtime/utils/streams/protocol.ts +24 -4
package/src/libs/agent-runtime/utils/streams/vertex-ai.test.ts +109 -8
package/src/libs/agent-runtime/utils/streams/vertex-ai.ts +68 -23
package/src/libs/langchain/loaders/epub/index.ts +4 -2
package/src/libs/mcp/__tests__/__snapshots__/index.test.ts.snap +3 -1
package/src/server/routers/async/file.ts +1 -1
package/src/server/routers/lambda/__tests__/file.test.ts +213 -0
package/src/server/routers/lambda/file.ts +1 -1
package/src/server/routers/lambda/importer.ts +4 -2
package/src/server/routers/lambda/message.ts +2 -2
package/src/server/routers/lambda/ragEval.ts +1 -1
package/src/server/routers/lambda/user.ts +2 -2
package/src/server/services/file/index.ts +46 -0
package/src/server/utils/tempFileManager.ts +5 -8

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,56 @@
 # Changelog
+### [Version 1.84.27](https://github.com/lobehub/lobe-chat/compare/v1.84.26...v1.84.27)
+<sup>Released on **2025-05-09**</sup>
+#### 💄 Styles
+- **misc**: Add reasoning tokens and token usage statistics for Google Gemini.
+<br/>
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+#### Styles
+- **misc**: Add reasoning tokens and token usage statistics for Google Gemini, closes [#7501](https://github.com/lobehub/lobe-chat/issues/7501) ([b466b42](https://github.com/lobehub/lobe-chat/commit/b466b42))
+</details>
+<div align="right">
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+</div>
+### [Version 1.84.26](https://github.com/lobehub/lobe-chat/compare/v1.84.25...v1.84.26)
+<sup>Released on **2025-05-08**</sup>
+#### 💄 Styles
+- **misc**: Add qwen3 for ollama.
+<br/>
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+#### Styles
+- **misc**: Add qwen3 for ollama, closes [#7746](https://github.com/lobehub/lobe-chat/issues/7746) ([806d905](https://github.com/lobehub/lobe-chat/commit/806d905))
+</details>
+<div align="right">
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+</div>
 ### [Version 1.84.25](https://github.com/lobehub/lobe-chat/compare/v1.84.24...v1.84.25)
 <sup>Released on **2025-05-08**</sup>

package/changelog/v1.json CHANGED Viewed

@@ -1,4 +1,22 @@
 [
+  {
+    "children": {
+      "improvements": [
+        "Add reasoning tokens and token usage statistics for Google Gemini."
+      ]
+    },
+    "date": "2025-05-09",
+    "version": "1.84.27"
+  },
+  {
+    "children": {
+      "improvements": [
+        "Add qwen3 for ollama."
+      ]
+    },
+    "date": "2025-05-08",
+    "version": "1.84.26"
+  },
   {
     "children": {
       "fixes": [

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lobehub/chat",
-  "version": "1.84.25",
+  "version": "1.84.27",
   "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
   "keywords": [
     "framework",
@@ -121,7 +121,7 @@
   "dependencies": {
     "@ant-design/icons": "^5.6.1",
     "@ant-design/pro-components": "^2.8.7",
-    "@anthropic-ai/sdk": "^0.39.0",
+    "@anthropic-ai/sdk": "^0.41.0",
     "@auth/core": "^0.38.0",
     "@aws-sdk/client-bedrock-runtime": "^3.779.0",
     "@aws-sdk/client-s3": "^3.779.0",
@@ -199,7 +199,7 @@
     "langfuse": "^3.37.1",
     "langfuse-core": "^3.37.1",
     "lodash-es": "^4.17.21",
-    "lucide-react": "^0.503.0",
+    "lucide-react": "^0.508.0",
     "mammoth": "^1.9.0",
     "mdast-util-to-markdown": "^2.1.2",
     "modern-screenshot": "^4.6.0",

package/src/config/aiModels/ollama.ts CHANGED Viewed

@@ -89,11 +89,22 @@ const ollamaChatModels: AIChatModelCard[] = [
     description:
       'QwQ 是 Qwen 系列的推理模型。与传统的指令调优模型相比，QwQ 具备思考和推理的能力，能够在下游任务中，尤其是困难问题上，显著提升性能。QwQ-32B 是中型推理模型，能够在与最先进的推理模型（如 DeepSeek-R1、o1-mini）竞争时取得可观的表现。',
     displayName: 'QwQ 32B',
-    enabled: true,
     id: 'qwq',
     releasedAt: '2024-11-28',
     type: 'chat',
   },
+  {
+    abilities: {
+      functionCall: true,
+    },
+    contextWindowTokens: 65_536,
+    description: 'Qwen3 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
+    displayName: 'Qwen3 7B',
+    enabled: true,
+    id: 'qwen3',
+    type: 'chat',
+  },
   {
     contextWindowTokens: 128_000,
     description: 'Qwen2.5 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
@@ -115,7 +126,6 @@ const ollamaChatModels: AIChatModelCard[] = [
     contextWindowTokens: 128_000,
     description: 'Qwen2.5 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
     displayName: 'Qwen2.5 7B',
-    enabled: true,
     id: 'qwen2.5',
     type: 'chat',
   },

package/src/config/aiModels/vertexai.ts CHANGED Viewed

@@ -98,9 +98,9 @@ const vertexaiChatModels: AIChatModelCard[] = [
     type: 'chat',
   },
   {
-    abilities: {
-      functionCall: true,
-      vision: true
+    abilities: {
+      functionCall: true,
+      vision: true
     },
     contextWindowTokens: 1_000_000 + 8192,
     description: 'Gemini 1.5 Flash 002 是一款高效的多模态模型，支持广泛应用的扩展。',
@@ -115,9 +115,9 @@ const vertexaiChatModels: AIChatModelCard[] = [
     type: 'chat',
   },
   {
-    abilities: {
-      functionCall: true,
-      vision: true
+    abilities: {
+      functionCall: true,
+      vision: true
     },
     contextWindowTokens: 2_000_000 + 8192,
     description:

package/src/libs/agent-runtime/google/index.ts CHANGED Viewed

@@ -106,6 +106,7 @@ export class LobeGoogleAI implements LobeRuntimeAI {
       const contents = await this.buildGoogleMessages(payload.messages);
+      const inputStartAt = Date.now();
       const geminiStreamResult = await this.client
         .getGenerativeModel(
           {
@@ -161,7 +162,7 @@ export class LobeGoogleAI implements LobeRuntimeAI {
       // Convert the response into a friendly text-stream
       const Stream = this.isVertexAi ? VertexAIStream : GoogleGenerativeAIStream;
-      const stream = Stream(prod, options?.callback);
+      const stream = Stream(prod, { callbacks: options?.callback, inputStartAt });
       // Respond with the stream
       return StreamingResponse(stream, { headers: options?.headers });

package/src/libs/agent-runtime/utils/streams/google-ai.test.ts CHANGED Viewed

@@ -34,10 +34,12 @@ describe('GoogleGenerativeAIStream', () => {
     const onCompletionMock = vi.fn();
     const protocolStream = GoogleGenerativeAIStream(mockGoogleStream, {
-      onStart: onStartMock,
-      onText: onTextMock,
-      onToolsCalling: onToolCallMock,
-      onCompletion: onCompletionMock,
+      callbacks: {
+        onStart: onStartMock,
+        onText: onTextMock,
+        onToolsCalling: onToolCallMock,
+        onCompletion: onCompletionMock,
+      },
     });
     const decoder = new TextDecoder();
@@ -187,7 +189,7 @@ describe('GoogleGenerativeAIStream', () => {
       // usage
       'id: chat_1\n',
       'event: usage\n',
-      `data: {"inputImageTokens":258,"inputTextTokens":8,"totalInputTokens":266,"totalTokens":266}\n\n`,
+      `data: {"inputImageTokens":258,"inputTextTokens":8,"outputTextTokens":0,"totalInputTokens":266,"totalOutputTokens":0,"totalTokens":266}\n\n`,
     ]);
   });
@@ -276,7 +278,100 @@ describe('GoogleGenerativeAIStream', () => {
         // usage
         'id: chat_1',
         'event: usage',
-        `data: {"inputTextTokens":19,"totalInputTokens":19,"totalOutputTokens":11,"totalTokens":30}\n`,
+        `data: {"inputTextTokens":19,"outputTextTokens":11,"totalInputTokens":19,"totalOutputTokens":11,"totalTokens":30}\n`,
+      ].map((i) => i + '\n'),
+    );
+  });
+  it('should handle stop with content and thought', async () => {
+    vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
+    const data = [
+      {
+        candidates: [
+          {
+            content: { parts: [{ text: '234' }], role: 'model' },
+            safetyRatings: [
+              { category: 'HARM_CATEGORY_HATE_SPEECH', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_DANGEROUS_CONTENT', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_HARASSMENT', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', probability: 'NEGLIGIBLE' },
+            ],
+          },
+        ],
+        text: () => '234',
+        usageMetadata: {
+          promptTokenCount: 19,
+          candidatesTokenCount: 3,
+          totalTokenCount: 122,
+          promptTokensDetails: [{ modality: 'TEXT', tokenCount: 19 }],
+          thoughtsTokenCount: 100,
+        },
+        modelVersion: 'gemini-2.0-flash-exp-image-generation',
+      },
+      {
+        text: () => '567890\n',
+        candidates: [
+          {
+            content: { parts: [{ text: '567890\n' }], role: 'model' },
+            finishReason: 'STOP',
+            safetyRatings: [
+              { category: 'HARM_CATEGORY_HATE_SPEECH', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_DANGEROUS_CONTENT', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_HARASSMENT', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', probability: 'NEGLIGIBLE' },
+            ],
+          },
+        ],
+        usageMetadata: {
+          promptTokenCount: 19,
+          candidatesTokenCount: 11,
+          totalTokenCount: 131,
+          promptTokensDetails: [{ modality: 'TEXT', tokenCount: 19 }],
+          candidatesTokensDetails: [{ modality: 'TEXT', tokenCount: 11 }],
+          thoughtsTokenCount: 100,
+        },
+        modelVersion: 'gemini-2.0-flash-exp-image-generation',
+      },
+    ];
+    const mockGoogleStream = new ReadableStream({
+      start(controller) {
+        data.forEach((item) => {
+          controller.enqueue(item);
+        });
+        controller.close();
+      },
+    });
+    const protocolStream = GoogleGenerativeAIStream(mockGoogleStream);
+    const decoder = new TextDecoder();
+    const chunks = [];
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+    expect(chunks).toEqual(
+      [
+        'id: chat_1',
+        'event: text',
+        'data: "234"\n',
+        'id: chat_1',
+        'event: text',
+        `data: "567890\\n"\n`,
+        // stop
+        'id: chat_1',
+        'event: stop',
+        `data: "STOP"\n`,
+        // usage
+        'id: chat_1',
+        'event: usage',
+        `data: {"inputTextTokens":19,"outputReasoningTokens":100,"outputTextTokens":11,"totalInputTokens":19,"totalOutputTokens":111,"totalTokens":131}\n`,
       ].map((i) => i + '\n'),
     );
   });

package/src/libs/agent-runtime/utils/streams/google-ai.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import {
   StreamToolCallChunkData,
   createCallbacksTransformer,
   createSSEProtocolTransformer,
+  createTokenSpeedCalculator,
   generateToolCallId,
 } from './protocol';
@@ -19,31 +20,62 @@ const transformGoogleGenerativeAIStream = (
   context: StreamContext,
 ): StreamProtocolChunk | StreamProtocolChunk[] => {
   // maybe need another structure to add support for multiple choices
+  const candidate = chunk.candidates?.[0];
+  const usage = chunk.usageMetadata;
+  const usageChunks: StreamProtocolChunk[] = [];
+  if (candidate?.finishReason && usage) {
+    const outputReasoningTokens = (usage as any).thoughtsTokenCount || undefined;
+    const totalOutputTokens = (usage.candidatesTokenCount ?? 0) + (outputReasoningTokens ?? 0);
+    usageChunks.push(
+      { data: candidate.finishReason, id: context?.id, type: 'stop' },
+      {
+        data: {
+          // TODO: Google SDK 0.24.0 don't have promptTokensDetails types
+          inputImageTokens: (usage as any).promptTokensDetails?.find(
+            (i: any) => i.modality === 'IMAGE',
+          )?.tokenCount,
+          inputTextTokens: (usage as any).promptTokensDetails?.find(
+            (i: any) => i.modality === 'TEXT',
+          )?.tokenCount,
+          outputReasoningTokens,
+          outputTextTokens: totalOutputTokens - (outputReasoningTokens ?? 0),
+          totalInputTokens: usage.promptTokenCount,
+          totalOutputTokens,
+          totalTokens: usage.totalTokenCount,
+        } as ModelTokensUsage,
+        id: context?.id,
+        type: 'usage',
+      },
+    );
+  }
   const functionCalls = chunk.functionCalls?.();
   if (functionCalls) {
-    return {
-      data: functionCalls.map(
-        (value, index): StreamToolCallChunkData => ({
-          function: {
-            arguments: JSON.stringify(value.args),
-            name: value.name,
-          },
-          id: generateToolCallId(index, value.name),
-          index: index,
-          type: 'function',
-        }),
-      ),
-      id: context.id,
-      type: 'tool_calls',
-    };
+    return [
+      {
+        data: functionCalls.map(
+          (value, index): StreamToolCallChunkData => ({
+            function: {
+              arguments: JSON.stringify(value.args),
+              name: value.name,
+            },
+            id: generateToolCallId(index, value.name),
+            index: index,
+            type: 'function',
+          }),
+        ),
+        id: context.id,
+        type: 'tool_calls',
+      },
+      ...usageChunks,
+    ];
   }
   const text = chunk.text?.();
-  if (chunk.candidates) {
-    const candidate = chunk.candidates[0];
+  if (candidate) {
     // return the grounding
     if (candidate.groundingMetadata) {
       const { webSearchQueries, groundingChunks } = candidate.groundingMetadata;
@@ -64,31 +96,15 @@ const transformGoogleGenerativeAIStream = (
           id: context.id,
           type: 'grounding',
         },
+        ...usageChunks,
       ];
     }
     if (candidate.finishReason) {
       if (chunk.usageMetadata) {
-        const usage = chunk.usageMetadata;
         return [
           !!text ? { data: text, id: context?.id, type: 'text' } : undefined,
-          { data: candidate.finishReason, id: context?.id, type: 'stop' },
-          {
-            data: {
-              // TODO: Google SDK 0.24.0 don't have promptTokensDetails types
-              inputImageTokens: (usage as any).promptTokensDetails?.find(
-                (i: any) => i.modality === 'IMAGE',
-              )?.tokenCount,
-              inputTextTokens: (usage as any).promptTokensDetails?.find(
-                (i: any) => i.modality === 'TEXT',
-              )?.tokenCount,
-              totalInputTokens: usage.promptTokenCount,
-              totalOutputTokens: usage.candidatesTokenCount,
-              totalTokens: usage.totalTokenCount,
-            } as ModelTokensUsage,
-            id: context?.id,
-            type: 'usage',
-          },
+          ...usageChunks,
         ].filter(Boolean) as StreamProtocolChunk[];
       }
       return { data: candidate.finishReason, id: context?.id, type: 'stop' };
@@ -117,13 +133,21 @@ const transformGoogleGenerativeAIStream = (
   };
 };
+export interface GoogleAIStreamOptions {
+  callbacks?: ChatStreamCallbacks;
+  inputStartAt?: number;
+}
 export const GoogleGenerativeAIStream = (
   rawStream: ReadableStream<EnhancedGenerateContentResponse>,
-  callbacks?: ChatStreamCallbacks,
+  { callbacks, inputStartAt }: GoogleAIStreamOptions = {},
 ) => {
   const streamStack: StreamContext = { id: 'chat_' + nanoid() };
   return rawStream
-    .pipeThrough(createSSEProtocolTransformer(transformGoogleGenerativeAIStream, streamStack))
+    .pipeThrough(
+      createTokenSpeedCalculator(transformGoogleGenerativeAIStream, { inputStartAt, streamStack }),
+    )
+    .pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
     .pipeThrough(createCallbacksTransformer(callbacks));
 };

package/src/libs/agent-runtime/utils/streams/protocol.ts CHANGED Viewed

@@ -298,17 +298,37 @@ export const TOKEN_SPEED_CHUNK_ID = 'output_speed';
  */
 export const createTokenSpeedCalculator = (
   transformer: (chunk: any, stack: StreamContext) => StreamProtocolChunk | StreamProtocolChunk[],
-  { streamStack, inputStartAt }: { inputStartAt?: number; streamStack?: StreamContext } = {},
+  { inputStartAt, streamStack }: { inputStartAt?: number; streamStack?: StreamContext } = {},
 ) => {
   let outputStartAt: number | undefined;
+  let outputThinking: boolean | undefined;
   const process = (chunk: StreamProtocolChunk) => {
     let result = [chunk];
-    // if the chunk is the first text chunk, set as output start
-    if (!outputStartAt && chunk.type === 'text') outputStartAt = Date.now();
+    // if the chunk is the first text or reasoning chunk, set as output start
+    if (!outputStartAt && (chunk.type === 'text' || chunk.type === 'reasoning')) {
+      outputStartAt = Date.now();
+    }
+    /**
+     * 部分 provider 在正式输出 reasoning 前，可能会先输出 content 为空字符串的 chunk，
+     * 其中 reasoning 可能为 null，会导致判断是否输出思考内容错误，所以过滤掉 null 或者空字符串。
+     * 也可能是某些特殊 token，所以不修改 outputStartAt 的逻辑。
+     */
+    if (
+      outputThinking === undefined &&
+      (chunk.type === 'text' || chunk.type === 'reasoning') &&
+      typeof chunk.data === 'string' &&
+      chunk.data.length > 0
+    ) {
+      outputThinking = chunk.type === 'reasoning';
+    }
     // if the chunk is the stop chunk, set as output finish
     if (inputStartAt && outputStartAt && chunk.type === 'usage') {
-      const outputTokens = chunk.data?.totalOutputTokens || chunk.data?.outputTextTokens;
+      const totalOutputTokens = chunk.data?.totalOutputTokens || chunk.data?.outputTextTokens;
+      const reasoningTokens = chunk.data?.outputReasoningTokens || 0;
+      const outputTokens =
+        (outputThinking ?? false) ? totalOutputTokens : totalOutputTokens - reasoningTokens;
       result.push({
         data: {
           tps: (outputTokens / (Date.now() - outputStartAt)) * 1000,

package/src/libs/agent-runtime/utils/streams/vertex-ai.test.ts CHANGED Viewed

@@ -103,10 +103,12 @@ describe('VertexAIStream', () => {
     const onCompletionMock = vi.fn();
     const protocolStream = VertexAIStream(mockGoogleStream, {
-      onStart: onStartMock,
-      onText: onTextMock,
-      onToolsCalling: onToolCallMock,
-      onCompletion: onCompletionMock,
+      callbacks: {
+        onStart: onStartMock,
+        onText: onTextMock,
+        onToolsCalling: onToolCallMock,
+        onCompletion: onCompletionMock,
+      },
     });
     const decoder = new TextDecoder();
@@ -136,6 +138,7 @@ describe('VertexAIStream', () => {
   it('tool_calls', async () => {
     vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
     const rawChunks = [
       {
         candidates: [
@@ -204,10 +207,12 @@ describe('VertexAIStream', () => {
     const onCompletionMock = vi.fn();
     const protocolStream = VertexAIStream(mockGoogleStream, {
-      onStart: onStartMock,
-      onText: onTextMock,
-      onToolsCalling: onToolCallMock,
-      onCompletion: onCompletionMock,
+      callbacks: {
+        onStart: onStartMock,
+        onText: onTextMock,
+        onToolsCalling: onToolCallMock,
+        onCompletion: onCompletionMock,
+      },
     });
     const decoder = new TextDecoder();
@@ -223,10 +228,106 @@ describe('VertexAIStream', () => {
       'id: chat_1\n',
       'event: tool_calls\n',
       `data: [{"function":{"arguments":"{\\"city\\":\\"杭州\\"}","name":"realtime-weather____fetchCurrentWeather"},"id":"realtime-weather____fetchCurrentWeather_0","index":0,"type":"function"}]\n\n`,
+      'id: chat_1\n',
+      'event: stop\n',
+      'data: "STOP"\n\n',
+      'id: chat_1\n',
+      'event: usage\n',
+      'data: {"outputTextTokens":9,"totalInputTokens":95,"totalOutputTokens":9,"totalTokens":104}\n\n',
     ]);
     expect(onStartMock).toHaveBeenCalledTimes(1);
     expect(onToolCallMock).toHaveBeenCalledTimes(1);
     expect(onCompletionMock).toHaveBeenCalledTimes(1);
   });
+  it('should handle stop with content', async () => {
+    vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
+    const data = [
+      {
+        candidates: [
+          {
+            content: { parts: [{ text: '234' }], role: 'model' },
+            safetyRatings: [
+              { category: 'HARM_CATEGORY_HATE_SPEECH', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_DANGEROUS_CONTENT', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_HARASSMENT', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', probability: 'NEGLIGIBLE' },
+            ],
+          },
+        ],
+        text: () => '234',
+        usageMetadata: {
+          promptTokenCount: 20,
+          totalTokenCount: 20,
+          promptTokensDetails: [{ modality: 'TEXT', tokenCount: 20 }],
+        },
+        modelVersion: 'gemini-2.0-flash-exp-image-generation',
+      },
+      {
+        text: () => '567890\n',
+        candidates: [
+          {
+            content: { parts: [{ text: '567890\n' }], role: 'model' },
+            finishReason: 'STOP',
+            safetyRatings: [
+              { category: 'HARM_CATEGORY_HATE_SPEECH', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_DANGEROUS_CONTENT', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_HARASSMENT', probability: 'NEGLIGIBLE' },
+              { category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', probability: 'NEGLIGIBLE' },
+            ],
+          },
+        ],
+        usageMetadata: {
+          promptTokenCount: 19,
+          candidatesTokenCount: 11,
+          totalTokenCount: 30,
+          promptTokensDetails: [{ modality: 'TEXT', tokenCount: 19 }],
+          candidatesTokensDetails: [{ modality: 'TEXT', tokenCount: 11 }],
+        },
+        modelVersion: 'gemini-2.0-flash-exp-image-generation',
+      },
+    ];
+    const mockGoogleStream = new ReadableStream({
+      start(controller) {
+        data.forEach((item) => {
+          controller.enqueue(item);
+        });
+        controller.close();
+      },
+    });
+    const protocolStream = VertexAIStream(mockGoogleStream);
+    const decoder = new TextDecoder();
+    const chunks = [];
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+    expect(chunks).toEqual(
+      [
+        'id: chat_1',
+        'event: text',
+        'data: "234"\n',
+        'id: chat_1',
+        'event: text',
+        `data: "567890\\n"\n`,
+        // stop
+        'id: chat_1',
+        'event: stop',
+        `data: "STOP"\n`,
+        // usage
+        'id: chat_1',
+        'event: usage',
+        `data: {"inputTextTokens":19,"outputTextTokens":11,"totalInputTokens":19,"totalOutputTokens":11,"totalTokens":30}\n`,
+      ].map((i) => i + '\n'),
+    );
+  });
 });

package/src/libs/agent-runtime/utils/streams/vertex-ai.ts CHANGED Viewed

@@ -1,27 +1,58 @@
 import { EnhancedGenerateContentResponse, GenerateContentResponse } from '@google/generative-ai';
+import { ModelTokensUsage } from '@/types/message';
 import { nanoid } from '@/utils/uuid';
-import { ChatStreamCallbacks } from '../../types';
+import { type GoogleAIStreamOptions } from './google-ai';
 import {
   StreamContext,
   StreamProtocolChunk,
   createCallbacksTransformer,
   createSSEProtocolTransformer,
+  createTokenSpeedCalculator,
   generateToolCallId,
 } from './protocol';
 const transformVertexAIStream = (
   chunk: GenerateContentResponse,
-  stack: StreamContext,
-): StreamProtocolChunk => {
+  context: StreamContext,
+): StreamProtocolChunk | StreamProtocolChunk[] => {
   // maybe need another structure to add support for multiple choices
-  const candidates = chunk.candidates;
+  const candidate = chunk.candidates?.[0];
+  const usage = chunk.usageMetadata;
+  const usageChunks: StreamProtocolChunk[] = [];
+  if (candidate?.finishReason && usage) {
+    const outputReasoningTokens = (usage as any).thoughtsTokenCount || undefined;
+    const totalOutputTokens = (usage.candidatesTokenCount ?? 0) + (outputReasoningTokens ?? 0);
+    usageChunks.push(
+      { data: candidate.finishReason, id: context?.id, type: 'stop' },
+      {
+        data: {
+          // TODO: Google SDK 0.24.0 don't have promptTokensDetails types
+          inputImageTokens: (usage as any).promptTokensDetails?.find(
+            (i: any) => i.modality === 'IMAGE',
+          )?.tokenCount,
+          inputTextTokens: (usage as any).promptTokensDetails?.find(
+            (i: any) => i.modality === 'TEXT',
+          )?.tokenCount,
+          outputReasoningTokens,
+          outputTextTokens: totalOutputTokens - (outputReasoningTokens ?? 0),
+          totalInputTokens: usage.promptTokenCount,
+          totalOutputTokens,
+          totalTokens: usage.totalTokenCount,
+        } as ModelTokensUsage,
+        id: context?.id,
+        type: 'usage',
+      },
+    );
+  }
+  const candidates = chunk.candidates;
   if (!candidates)
     return {
       data: '',
-      id: stack?.id,
+      id: context?.id,
       type: 'text',
     };
@@ -32,44 +63,58 @@ const transformVertexAIStream = (
     if (part.functionCall) {
       const functionCall = part.functionCall;
-      return {
-        data: [
-          {
-            function: {
-              arguments: JSON.stringify(functionCall.args),
-              name: functionCall.name,
+      return [
+        {
+          data: [
+            {
+              function: {
+                arguments: JSON.stringify(functionCall.args),
+                name: functionCall.name,
+              },
+              id: generateToolCallId(0, functionCall.name),
+              index: 0,
+              type: 'function',
             },
-            id: generateToolCallId(0, functionCall.name),
-            index: 0,
-            type: 'function',
-          },
-        ],
-        id: stack?.id,
-        type: 'tool_calls',
-      };
+          ],
+          id: context?.id,
+          type: 'tool_calls',
+        },
+        ...usageChunks,
+      ];
+    }
+    if (item.finishReason) {
+      if (chunk.usageMetadata) {
+        return [
+          !!part.text ? { data: part.text, id: context?.id, type: 'text' } : undefined,
+          ...usageChunks,
+        ].filter(Boolean) as StreamProtocolChunk[];
+      }
+      return { data: item.finishReason, id: context?.id, type: 'stop' };
     }
     return {
       data: part.text,
-      id: stack?.id,
+      id: context?.id,
       type: 'text',
     };
   }
   return {
     data: '',
-    id: stack?.id,
+    id: context?.id,
     type: 'stop',
   };
 };
 export const VertexAIStream = (
   rawStream: ReadableStream<EnhancedGenerateContentResponse>,
-  callbacks?: ChatStreamCallbacks,
+  { callbacks, inputStartAt }: GoogleAIStreamOptions = {},
 ) => {
   const streamStack: StreamContext = { id: 'chat_' + nanoid() };
   return rawStream
-    .pipeThrough(createSSEProtocolTransformer(transformVertexAIStream, streamStack))
+    .pipeThrough(createTokenSpeedCalculator(transformVertexAIStream, { inputStartAt, streamStack }))
+    .pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
     .pipeThrough(createCallbacksTransformer(callbacks));
 };

package/src/libs/langchain/loaders/epub/index.ts CHANGED Viewed

@@ -2,13 +2,15 @@ import { EPubLoader as Loader } from '@langchain/community/document_loaders/fs/e
 import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
 import { TempFileManager } from '@/server/utils/tempFileManager';
+import { nanoid } from '@/utils/uuid';
 import { loaderConfig } from '../config';
 export const EPubLoader = async (content: Uint8Array) => {
-  const tempManager = new TempFileManager();
+  const tempManager = new TempFileManager('epub-');
   try {
-    const tempPath = await tempManager.writeTempFile(content);
+    const tempPath = await tempManager.writeTempFile(content, `${nanoid()}.epub`);
     const loader = new Loader(tempPath);
     const documents = await loader.load();

package/src/libs/mcp/__tests__/__snapshots__/index.test.ts.snap CHANGED Viewed

@@ -21,9 +21,11 @@ exports[`MCPClient > Stdio Transport > should list tools via stdio 1`] = `
     "name": "echo",
   },
   {
-    "annotations": {},
     "description": "Lists all available tools and methods",
     "inputSchema": {
+      "$schema": "http://json-schema.org/draft-07/schema#",
+      "additionalProperties": false,
+      "properties": {},
       "type": "object",
     },
     "name": "debug",

package/src/server/routers/async/file.ts CHANGED Viewed

@@ -35,7 +35,7 @@ const fileProcedure = asyncAuthedProcedure.use(async (opts) => {
       chunkService: new ChunkService(ctx.userId),
       embeddingModel: new EmbeddingModel(ctx.serverDB, ctx.userId),
       fileModel: new FileModel(ctx.serverDB, ctx.userId),
-      fileService: new FileService(),
+      fileService: new FileService(ctx.serverDB, ctx.userId),
     },
   });
 });

package/src/server/routers/lambda/__tests__/file.test.ts ADDED Viewed

@@ -0,0 +1,213 @@
+import { TRPCError } from '@trpc/server';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+import { fileRouter } from '@/server/routers/lambda/file';
+import { AsyncTaskStatus } from '@/types/asyncTask';
+// Patch: Use actual router context middleware to inject the correct models/services
+function createCallerWithCtx(partialCtx: any = {}) {
+  // All mocks are spies
+  const fileModel = {
+    checkHash: vi.fn().mockResolvedValue({ isExist: true }),
+    create: vi.fn().mockResolvedValue({ id: 'test-id' }),
+    findById: vi.fn().mockResolvedValue(undefined),
+    query: vi.fn().mockResolvedValue([]),
+    delete: vi.fn().mockResolvedValue(undefined),
+    deleteMany: vi.fn().mockResolvedValue([]),
+    clear: vi.fn().mockResolvedValue({} as any),
+  };
+  const fileService = {
+    getFullFileUrl: vi.fn().mockResolvedValue('full-url'),
+    deleteFile: vi.fn().mockResolvedValue(undefined),
+    deleteFiles: vi.fn().mockResolvedValue(undefined),
+  };
+  const chunkModel = {
+    countByFileIds: vi.fn().mockResolvedValue([{ id: 'test-id', count: 5 }]),
+    countByFileId: vi.fn().mockResolvedValue(5),
+  };
+  const asyncTaskModel = {
+    findByIds: vi.fn().mockResolvedValue([
+      {
+        id: 'test-task-id',
+        status: AsyncTaskStatus.Success,
+      },
+    ]),
+    findById: vi.fn(),
+    delete: vi.fn(),
+  };
+  const ctx = {
+    serverDB: {} as any,
+    userId: 'test-user',
+    asyncTaskModel,
+    chunkModel,
+    fileModel,
+    fileService,
+    ...partialCtx,
+  };
+  return { ctx, caller: fileRouter.createCaller(ctx) };
+}
+vi.mock('@/config/db', () => ({
+  serverDBEnv: {
+    REMOVE_GLOBAL_FILE: false,
+  },
+}));
+vi.mock('@/database/models/asyncTask', () => ({
+  AsyncTaskModel: vi.fn(() => ({
+    findById: vi.fn(),
+    findByIds: vi.fn(),
+    delete: vi.fn(),
+  })),
+}));
+vi.mock('@/database/models/chunk', () => ({
+  ChunkModel: vi.fn(() => ({
+    countByFileId: vi.fn(),
+    countByFileIds: vi.fn(),
+  })),
+}));
+vi.mock('@/database/models/file', () => ({
+  FileModel: vi.fn(() => ({
+    checkHash: vi.fn(),
+    create: vi.fn(),
+    delete: vi.fn(),
+    deleteMany: vi.fn(),
+    findById: vi.fn(),
+    query: vi.fn(),
+    clear: vi.fn(),
+  })),
+}));
+vi.mock('@/server/services/file', () => ({
+  FileService: vi.fn(() => ({
+    getFullFileUrl: vi.fn(),
+    deleteFile: vi.fn(),
+    deleteFiles: vi.fn(),
+  })),
+}));
+describe('fileRouter', () => {
+  let ctx: any;
+  let caller: any;
+  let mockFile: any;
+  beforeEach(() => {
+    vi.clearAllMocks();
+    mockFile = {
+      id: 'test-id',
+      name: 'test.txt',
+      url: 'test-url',
+      createdAt: new Date(),
+      updatedAt: new Date(),
+      accessedAt: new Date(),
+      userId: 'test-user',
+      size: 100,
+      fileType: 'text',
+      metadata: {},
+      fileHash: null,
+      clientId: null,
+      chunkTaskId: null,
+      embeddingTaskId: null,
+    };
+    // Use actual context with default mocks
+    ({ ctx, caller } = createCallerWithCtx());
+  });
+  describe('checkFileHash', () => {
+    it('should handle when fileModel.checkHash returns undefined', async () => {
+      ctx.fileModel.checkHash.mockResolvedValue(undefined);
+      await expect(caller.checkFileHash({ hash: 'test-hash' })).resolves.toBeUndefined();
+    });
+  });
+  describe('createFile', () => {
+    it('should throw if fileModel.checkHash returns undefined', async () => {
+      ctx.fileModel.checkHash.mockResolvedValue(undefined);
+      await expect(
+        caller.createFile({
+          hash: 'test-hash',
+          fileType: 'text',
+          name: 'test.txt',
+          size: 100,
+          url: 'test-url',
+          metadata: {},
+        }),
+      ).rejects.toThrow();
+    });
+  });
+  describe('findById', () => {
+    it('should throw error when file not found', async () => {
+      ctx.fileModel.findById.mockResolvedValue(null);
+      await expect(caller.findById({ id: 'invalid-id' })).rejects.toThrow(TRPCError);
+    });
+  });
+  describe('getFileItemById', () => {
+    it('should throw error when file not found', async () => {
+      ctx.fileModel.findById.mockResolvedValue(null);
+      await expect(caller.getFileItemById({ id: 'invalid-id' })).rejects.toThrow(TRPCError);
+    });
+  });
+  describe('getFiles', () => {
+    it('should handle fileModel.query returning undefined', async () => {
+      ctx.fileModel.query.mockResolvedValue(undefined);
+      await expect(caller.getFiles({})).rejects.toThrow();
+    });
+  });
+  describe('removeFile', () => {
+    it('should do nothing when file not found', async () => {
+      ctx.fileModel.delete.mockResolvedValue(null);
+      await caller.removeFile({ id: 'invalid-id' });
+      expect(ctx.fileService.deleteFile).not.toHaveBeenCalled();
+    });
+  });
+  describe('removeFiles', () => {
+    it('should do nothing when no files found', async () => {
+      ctx.fileModel.deleteMany.mockResolvedValue([]);
+      await caller.removeFiles({ ids: ['invalid-1', 'invalid-2'] });
+      expect(ctx.fileService.deleteFiles).not.toHaveBeenCalled();
+    });
+  });
+  describe('removeFileAsyncTask', () => {
+    it('should do nothing when file not found', async () => {
+      ctx.fileModel.findById.mockResolvedValue(null);
+      await caller.removeFileAsyncTask({ id: 'test-id', type: 'chunk' });
+      expect(ctx.asyncTaskModel.delete).not.toHaveBeenCalled();
+    });
+    it('should do nothing when task id is missing', async () => {
+      ctx.fileModel.findById.mockResolvedValue(mockFile);
+      await caller.removeFileAsyncTask({ id: 'test-id', type: 'embedding' });
+      expect(ctx.asyncTaskModel.delete).not.toHaveBeenCalled();
+      await caller.removeFileAsyncTask({ id: 'test-id', type: 'chunk' });
+      expect(ctx.asyncTaskModel.delete).not.toHaveBeenCalled();
+    });
+  });
+});

package/src/server/routers/lambda/file.ts CHANGED Viewed

@@ -19,7 +19,7 @@ const fileProcedure = authedProcedure.use(serverDatabase).use(async (opts) => {
       asyncTaskModel: new AsyncTaskModel(ctx.serverDB, ctx.userId),
       chunkModel: new ChunkModel(ctx.serverDB, ctx.userId),
       fileModel: new FileModel(ctx.serverDB, ctx.userId),
-      fileService: new FileService(),
+      fileService: new FileService(ctx.serverDB, ctx.userId),
     },
   });
 });

package/src/server/routers/lambda/importer.ts CHANGED Viewed

@@ -10,10 +10,12 @@ import { ImportResultData, ImporterEntryData } from '@/types/importer';
 const importProcedure = authedProcedure.use(serverDatabase).use(async (opts) => {
   const { ctx } = opts;
-  const dataImporterService = new DataImporterRepos(ctx.serverDB, ctx.userId);
   return opts.next({
-    ctx: { dataImporterService, fileService: new FileService() },
+    ctx: {
+      dataImporterService: new DataImporterRepos(ctx.serverDB, ctx.userId),
+      fileService: new FileService(ctx.serverDB, ctx.userId),
+    },
   });
 });

package/src/server/routers/lambda/message.ts CHANGED Viewed

@@ -16,7 +16,7 @@ const messageProcedure = authedProcedure.use(serverDatabase).use(async (opts) =>
   return opts.next({
     ctx: {
-      fileService: new FileService(),
+      fileService: new FileService(ctx.serverDB, ctx.userId),
       messageModel: new MessageModel(ctx.serverDB, ctx.userId),
     },
   });
@@ -102,7 +102,7 @@ export const messageRouter = router({
       const serverDB = await getServerDB();
       const messageModel = new MessageModel(serverDB, ctx.userId);
-      const fileService = new FileService();
+      const fileService = new FileService(serverDB, ctx.userId);
       return messageModel.query(input, {
         postProcessUrl: (path) => fileService.getFullFileUrl(path),

package/src/server/routers/lambda/ragEval.ts CHANGED Viewed

@@ -40,7 +40,7 @@ const ragEvalProcedure = authedProcedure
         datasetRecordModel: new EvalDatasetRecordModel(ctx.userId),
         evaluationModel: new EvalEvaluationModel(ctx.userId),
         evaluationRecordModel: new EvaluationRecordModel(ctx.userId),
-        fileService: new FileService(),
+        fileService: new FileService(ctx.serverDB, ctx.userId),
       },
     });
   });

package/src/server/routers/lambda/user.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { UserJSON } from '@clerk/backend';
+import { v4 as uuidv4 } from 'uuid';
 import { z } from 'zod';
-import { v4 as uuidv4 } from 'uuid'; // 需要添加此导入
 import { enableClerk } from '@/const/auth';
 import { isDesktop } from '@/const/version';
@@ -28,7 +28,7 @@ const userProcedure = authedProcedure.use(serverDatabase).use(async ({ ctx, next
   return next({
     ctx: {
       clerkAuth: new ClerkAuth(),
-      fileService: new FileService(),
+      fileService: new FileService(ctx.serverDB, ctx.userId),
       nextAuthDbAdapter: LobeNextAuthDbAdapter(ctx.serverDB),
       userModel: new UserModel(ctx.serverDB, ctx.userId),
     },

package/src/server/services/file/index.ts CHANGED Viewed

@@ -1,3 +1,12 @@
+import { TRPCError } from '@trpc/server';
+import { serverDBEnv } from '@/config/db';
+import { FileModel } from '@/database/models/file';
+import { FileItem } from '@/database/schemas';
+import { LobeChatDatabase } from '@/database/type';
+import { TempFileManager } from '@/server/utils/tempFileManager';
+import { nanoid } from '@/utils/uuid';
 import { FileServiceImpl, createFileServiceModule } from './impls';
 /**
@@ -5,8 +14,16 @@ import { FileServiceImpl, createFileServiceModule } from './impls';
  * 使用模块化实现方式，提供文件操作服务
  */
 export class FileService {
+  private userId: string;
+  private fileModel: FileModel;
   private impl: FileServiceImpl = createFileServiceModule();
+  constructor(db: LobeChatDatabase, userId: string) {
+    this.userId = userId;
+    this.fileModel = new FileModel(db, userId);
+  }
   /**
    * 删除文件
    */
@@ -62,4 +79,33 @@ export class FileService {
   public async getFullFileUrl(url?: string | null, expiresIn?: number): Promise<string> {
     return this.impl.getFullFileUrl(url, expiresIn);
   }
+  async downloadFileToLocal(
+    fileId: string,
+  ): Promise<{ cleanup: () => void; file: FileItem; filePath: string }> {
+    const file = await this.fileModel.findById(fileId);
+    if (!file) {
+      throw new TRPCError({ code: 'BAD_REQUEST', message: 'File not found' });
+    }
+    let content: Uint8Array | undefined;
+    try {
+      content = await this.getFileByteArray(file.url);
+    } catch (e) {
+      console.error(e);
+      // if file not found, delete it from db
+      if ((e as any).Code === 'NoSuchKey') {
+        await this.fileModel.delete(fileId, serverDBEnv.REMOVE_GLOBAL_FILE);
+        throw new TRPCError({ code: 'BAD_REQUEST', message: 'File not found' });
+      }
+    }
+    if (!content) throw new TRPCError({ code: 'BAD_REQUEST', message: 'File content is empty' });
+    const dir = nanoid();
+    const tempManager = new TempFileManager(dir);
+    const filePath = await tempManager.writeTempFile(content, file.name);
+    return { cleanup: () => tempManager.cleanup(), file, filePath };
+  }
 }

package/src/server/utils/tempFileManager.ts CHANGED Viewed

@@ -1,7 +1,6 @@
 import { existsSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
 import { tmpdir } from 'node:os';
 import { join } from 'node:path';
-import { v4 as uuidv4 } from 'uuid';
 /**
  * 安全存储临时文件工具类
@@ -10,21 +9,19 @@ export class TempFileManager {
   private readonly tempDir: string;
   private filePaths: Set<string> = new Set();
-  constructor() {
+  constructor(dirname: string) {
     // 创建唯一临时目录 (跨平台安全)
-    this.tempDir = mkdtempSync(join(tmpdir(), 'epub-'));
+    this.tempDir = mkdtempSync(join(tmpdir(), dirname));
     // 注册退出清理钩子
     this.registerCleanupHook();
   }
   /**
    * 将 Uint8Array 写入临时文件
-   * @param data 文件数据
-   * @param ext 文件扩展名 (默认 .epub)
-   * @returns 临时文件绝对路径
    */
-  async writeTempFile(data: Uint8Array, ext = '.epub'): Promise<string> {
-    const filePath = join(this.tempDir, `${uuidv4()}${ext}`);
+  async writeTempFile(data: Uint8Array, name: string): Promise<string> {
+    const filePath = join(this.tempDir, name);
     try {
       writeFileSync(filePath, data);