npm - @lobehub/chat - Versions diffs - 1.116.4 → 1.117.0 - Mend

@lobehub/chat 1.116.4 → 1.117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/CHANGELOG.md +92 -0
package/changelog/v1.json +12 -0
package/locales/ar/models.json +3 -0
package/locales/bg-BG/models.json +3 -0
package/locales/de-DE/models.json +3 -0
package/locales/en-US/models.json +3 -0
package/locales/es-ES/models.json +3 -0
package/locales/fa-IR/models.json +3 -0
package/locales/fr-FR/models.json +3 -0
package/locales/it-IT/models.json +3 -0
package/locales/ja-JP/models.json +3 -0
package/locales/ko-KR/models.json +3 -0
package/locales/nl-NL/models.json +3 -0
package/locales/pl-PL/models.json +3 -0
package/locales/pt-BR/models.json +3 -0
package/locales/ru-RU/models.json +3 -0
package/locales/tr-TR/models.json +3 -0
package/locales/vi-VN/models.json +3 -0
package/locales/zh-CN/models.json +3 -0
package/locales/zh-TW/models.json +3 -0
package/package.json +1 -1
package/packages/const/src/image.ts +9 -0
package/packages/database/vitest.config.mts +1 -0
package/packages/database/vitest.config.server.mts +1 -0
package/packages/file-loaders/package.json +1 -1
package/packages/model-runtime/src/RouterRuntime/createRuntime.ts +11 -9
package/packages/model-runtime/src/google/createImage.test.ts +657 -0
package/packages/model-runtime/src/google/createImage.ts +152 -0
package/packages/model-runtime/src/google/index.test.ts +0 -328
package/packages/model-runtime/src/google/index.ts +3 -40
package/packages/model-runtime/src/utils/modelParse.ts +2 -1
package/packages/model-runtime/src/utils/openaiCompatibleFactory/createImage.ts +239 -0
package/packages/model-runtime/src/utils/openaiCompatibleFactory/index.test.ts +22 -22
package/packages/model-runtime/src/utils/openaiCompatibleFactory/index.ts +9 -116
package/packages/model-runtime/src/utils/postProcessModelList.ts +55 -0
package/packages/model-runtime/src/utils/streams/google-ai.test.ts +7 -7
package/packages/model-runtime/src/utils/streams/google-ai.ts +15 -2
package/packages/model-runtime/src/utils/streams/openai/openai.test.ts +41 -0
package/packages/model-runtime/src/utils/streams/openai/openai.ts +38 -2
package/packages/model-runtime/src/utils/streams/protocol.test.ts +32 -0
package/packages/model-runtime/src/utils/streams/protocol.ts +7 -3
package/packages/model-runtime/src/utils/usageConverter.test.ts +58 -0
package/packages/model-runtime/src/utils/usageConverter.ts +5 -1
package/packages/utils/vitest.config.mts +1 -0
package/src/config/aiModels/google.ts +42 -22
package/src/config/aiModels/openrouter.ts +33 -0
package/src/config/aiModels/vertexai.ts +4 -4
package/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx +6 -0
package/src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts +38 -0
package/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts +13 -1
package/src/locales/default/chat.ts +1 -0
package/packages/model-runtime/src/UniformRuntime/index.ts +0 -117

package/packages/model-runtime/src/utils/openaiCompatibleFactory/createImage.ts ADDED Viewed

@@ -0,0 +1,239 @@
+import { imageUrlToBase64 } from '@lobechat/utils';
+import createDebug from 'debug';
+import OpenAI from 'openai';
+import { RuntimeImageGenParamsValue } from '@/libs/standard-parameters/index';
+import { CreateImagePayload, CreateImageResponse } from '../../types/image';
+import { convertImageUrlToFile } from '../openaiHelpers';
+import { parseDataUri } from '../uriParser';
+const log = createDebug('lobe-image:openai-compatible');
+/**
+ * Generate images using traditional OpenAI images API (DALL-E, etc.)
+ */
+async function generateByImageMode(
+  client: OpenAI,
+  payload: CreateImagePayload,
+): Promise<CreateImageResponse> {
+  const { model, params } = payload;
+  log('Creating image with model: %s and params: %O', model, params);
+  // Map parameter names, mapping imageUrls to image
+  const paramsMap = new Map<RuntimeImageGenParamsValue, string>([
+    ['imageUrls', 'image'],
+    ['imageUrl', 'image'],
+  ]);
+  const userInput: Record<string, any> = Object.fromEntries(
+    Object.entries(params).map(([key, value]) => [
+      paramsMap.get(key as RuntimeImageGenParamsValue) ?? key,
+      value,
+    ]),
+  );
+  // https://platform.openai.com/docs/api-reference/images/createEdit
+  const isImageEdit = Array.isArray(userInput.image) && userInput.image.length > 0;
+  // If there are imageUrls parameters, convert them to File objects
+  if (isImageEdit) {
+    log('Converting imageUrls to File objects: %O', userInput.image);
+    try {
+      // Convert all image URLs to File objects
+      const imageFiles = await Promise.all(
+        userInput.image.map((url: string) => convertImageUrlToFile(url)),
+      );
+      log('Successfully converted %d images to File objects', imageFiles.length);
+      // According to official docs, if there are multiple images, pass an array; if only one, pass a single File
+      userInput.image = imageFiles.length === 1 ? imageFiles[0] : imageFiles;
+    } catch (error) {
+      log('Error converting imageUrls to File objects: %O', error);
+      throw new Error(`Failed to convert image URLs to File objects: ${error}`);
+    }
+  } else {
+    delete userInput.image;
+  }
+  if (userInput.size === 'auto') {
+    delete userInput.size;
+  }
+  const defaultInput = {
+    n: 1,
+    ...(model.includes('dall-e') ? { response_format: 'b64_json' } : {}),
+    ...(isImageEdit ? { input_fidelity: 'high' } : {}),
+  };
+  const options = {
+    model,
+    ...defaultInput,
+    ...userInput,
+  };
+  log('options: %O', options);
+  // Determine if it's an image editing operation
+  const img = isImageEdit
+    ? await client.images.edit(options as any)
+    : await client.images.generate(options as any);
+  // Check the integrity of response data
+  if (!img || !img.data || !Array.isArray(img.data) || img.data.length === 0) {
+    log('Invalid image response: missing data array');
+    throw new Error('Invalid image response: missing or empty data array');
+  }
+  const imageData = img.data[0];
+  if (!imageData) {
+    log('Invalid image response: first data item is null/undefined');
+    throw new Error('Invalid image response: first data item is null or undefined');
+  }
+  let imageUrl: string;
+  // Handle base64 format response
+  if (imageData.b64_json) {
+    // Determine the image's MIME type, default to PNG
+    const mimeType = 'image/png'; // OpenAI image generation defaults to PNG format
+    // Convert base64 string to complete data URL
+    imageUrl = `data:${mimeType};base64,${imageData.b64_json}`;
+    log('Successfully converted base64 to data URL, length: %d', imageUrl.length);
+  }
+  // Handle URL format response
+  else if (imageData.url) {
+    imageUrl = imageData.url;
+    log('Using direct image URL: %s', imageUrl);
+  }
+  // If neither format exists, throw error
+  else {
+    log('Invalid image response: missing both b64_json and url fields');
+    throw new Error('Invalid image response: missing both b64_json and url fields');
+  }
+  return {
+    imageUrl,
+  };
+}
+/**
+ * Process image URL for chat model input
+ */
+async function processImageUrlForChat(imageUrl: string): Promise<string> {
+  const { type, base64, mimeType } = parseDataUri(imageUrl);
+  if (type === 'base64') {
+    if (!base64) {
+      throw new TypeError("Image URL doesn't contain base64 data");
+    }
+    return `data:${mimeType || 'image/png'};base64,${base64}`;
+  } else if (type === 'url') {
+    // For URL type, convert to base64 first
+    const { base64: urlBase64, mimeType: urlMimeType } = await imageUrlToBase64(imageUrl);
+    return `data:${urlMimeType};base64,${urlBase64}`;
+  } else {
+    throw new TypeError(`Currently we don't support image url: ${imageUrl}`);
+  }
+}
+/**
+ * Generate images using chat completion API (OpenRouter Gemini, etc.)
+ */
+async function generateByChatModel(
+  client: OpenAI,
+  payload: CreateImagePayload,
+): Promise<CreateImageResponse> {
+  const { model, params } = payload;
+  const actualModel = model.replace(':image', ''); // Remove :image suffix
+  log('Creating image via chat API with model: %s and params: %O', actualModel, params);
+  // Build message content array
+  const content: Array<any> = [
+    {
+      text: params.prompt,
+      type: 'text',
+    },
+  ];
+  // Add image for editing mode if provided
+  if (params.imageUrl && params.imageUrl !== null) {
+    log('Processing image URL for editing mode: %s', params.imageUrl);
+    try {
+      const processedImageUrl = await processImageUrlForChat(params.imageUrl);
+      content.push({
+        image_url: {
+          url: processedImageUrl,
+        },
+        type: 'image_url',
+      });
+      log('Successfully processed image URL for chat input');
+    } catch (error) {
+      log('Error processing image URL: %O', error);
+      throw new Error(`Failed to process image URL: ${error}`);
+    }
+  }
+  // Call chat completion API
+  const response = await client.chat.completions.create({
+    messages: [
+      {
+        content,
+        role: 'user',
+      },
+    ],
+    model: actualModel,
+    stream: false,
+  });
+  log('Chat API response: %O', response);
+  // Extract image from response
+  const message = response.choices[0]?.message;
+  if (!message) {
+    throw new Error('No message in chat completion response');
+  }
+  // Check if response has images in the expected format
+  if ((message as any).images && Array.isArray((message as any).images)) {
+    const { images } = message as any;
+    if (images.length > 0) {
+      const image = images[0];
+      if (image.image_url?.url) {
+        log('Successfully extracted image from chat response');
+        return { imageUrl: image.image_url.url };
+      }
+    }
+  }
+  // If no images found, throw error
+  log('No images found in chat completion response');
+  throw new Error('No image generated in chat completion response');
+}
+/**
+ * Create image using OpenAI Compatible API
+ */
+export async function createOpenAICompatibleImage(
+  client: OpenAI,
+  payload: CreateImagePayload,
+  _provider: string, // eslint-disable-line @typescript-eslint/no-unused-vars
+): Promise<CreateImageResponse> {
+  try {
+    const { model } = payload;
+    // Check if it's a chat model for image generation (via :image suffix)
+    if (model.endsWith(':image')) {
+      return await generateByChatModel(client, payload);
+    }
+    // Default to traditional images API
+    return await generateByImageMode(client, payload);
+  } catch (error) {
+    const err = error as Error;
+    log('Error in createImage: %O', err);
+    throw err;
+  }
+}

package/packages/model-runtime/src/utils/openaiCompatibleFactory/index.test.ts CHANGED Viewed

@@ -45,7 +45,7 @@ const LobeMockProvider = createOpenAICompatibleRuntime({
 beforeEach(() => {
   instance = new LobeMockProvider({ apiKey: 'test' });
-  // 使用 vi.spyOn 来模拟 chat.completions.create 方法
+  // Use vi.spyOn to mock the chat.completions.create method
   vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
     new ReadableStream() as any,
   );
@@ -540,16 +540,16 @@ describe('LobeOpenAICompatibleFactory', () => {
           { signal: controller.signal },
         );
-        // 给一些时间让请求开始
+        // Give some time for the request to start
         await sleep(50);
         controller.abort();
-        // 等待并断言 Promise 被拒绝
-        // 使用 try-catch 来捕获和验证错误
+        // Wait and assert that Promise is rejected
+        // Use try-catch to capture and verify errors
         try {
           await chatPromise;
-          // 如果 Promise 没有被拒绝，测试应该失败
+          // If Promise is not rejected, test should fail
           expect.fail('Expected promise to be rejected');
         } catch (error) {
           expect((error as any).errorType).toBe('AgentRuntimeError');
@@ -753,7 +753,7 @@ describe('LobeOpenAICompatibleFactory', () => {
     describe('chat with callback and headers', () => {
       it('should handle callback and headers correctly', async () => {
-        // 模拟 chat.completions.create 方法返回一个可读流
+        // Mock chat.completions.create method to return a readable stream
         const mockCreateMethod = vi
           .spyOn(instance['client'].chat.completions, 'create')
           .mockResolvedValue(
@@ -774,14 +774,14 @@ describe('LobeOpenAICompatibleFactory', () => {
             }) as any,
           );
-        // 准备 callback 和 headers
+        // Prepare callback and headers
         const mockCallback: ChatStreamCallbacks = {
           onStart: vi.fn(),
           onCompletion: vi.fn(),
         };
         const mockHeaders = { 'Custom-Header': 'TestValue' };
-        // 执行测试
+        // Execute test
         const result = await instance.chat(
           {
             messages: [{ content: 'Hello', role: 'user' }],
@@ -791,17 +791,17 @@ describe('LobeOpenAICompatibleFactory', () => {
           { callback: mockCallback, headers: mockHeaders },
         );
-        // 验证 callback 被调用
-        await result.text(); // 确保流被消费
+        // Verify callback is called
+        await result.text(); // Ensure stream is consumed
         expect(mockCallback.onStart).toHaveBeenCalled();
         expect(mockCallback.onCompletion).toHaveBeenCalledWith({
           text: 'hello',
         });
-        // 验证 headers 被正确传递
+        // Verify headers are correctly passed
         expect(result.headers.get('Custom-Header')).toEqual('TestValue');
-        // 清理
+        // Cleanup
         mockCreateMethod.mockRestore();
       });
     });
@@ -940,40 +940,40 @@ describe('LobeOpenAICompatibleFactory', () => {
     describe('DEBUG', () => {
       it('should call debugStream and return StreamingTextResponse when DEBUG_OPENROUTER_CHAT_COMPLETION is 1', async () => {
         // Arrange
-        const mockProdStream = new ReadableStream() as any; // 模拟的 prod 流
+        const mockProdStream = new ReadableStream() as any; // Mocked prod stream
         const mockDebugStream = new ReadableStream({
           start(controller) {
             controller.enqueue('Debug stream content');
             controller.close();
           },
         }) as any;
-        mockDebugStream.toReadableStream = () => mockDebugStream; // 添加 toReadableStream 方法
+        mockDebugStream.toReadableStream = () => mockDebugStream; // Add toReadableStream method
-        // 模拟 chat.completions.create 返回值，包括模拟的 tee 方法
+        // Mock chat.completions.create return value, including mocked tee method
         (instance['client'].chat.completions.create as Mock).mockResolvedValue({
           tee: () => [mockProdStream, { toReadableStream: () => mockDebugStream }],
         });
-        // 保存原始环境变量值
+        // Save original environment variable value
         const originalDebugValue = process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION;
-        // 模拟环境变量
+        // Mock environment variable
         process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION = '1';
         vi.spyOn(debugStreamModule, 'debugStream').mockImplementation(() => Promise.resolve());
-        // 执行测试
-        // 运行你的测试函数，确保它会在条件满足时调用 debugStream
-        // 假设的测试函数调用，你可能需要根据实际情况调整
+        // Execute test
+        // Run your test function, ensuring it calls debugStream when conditions are met
+        // Hypothetical test function call, you may need to adjust based on actual situation
         await instance.chat({
           messages: [{ content: 'Hello', role: 'user' }],
           model: 'mistralai/mistral-7b-instruct:free',
           temperature: 0,
         });
-        // 验证 debugStream 被调用
+        // Verify debugStream is called
         expect(debugStreamModule.debugStream).toHaveBeenCalled();
-        // 恢复原始环境变量值
+        // Restore original environment variable value
         process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION = originalDebugValue;
       });
     });

package/packages/model-runtime/src/utils/openaiCompatibleFactory/index.ts CHANGED Viewed

@@ -1,12 +1,11 @@
 import { getModelPropertyWithFallback } from '@lobechat/utils';
 import dayjs from 'dayjs';
 import utc from 'dayjs/plugin/utc';
-import createDebug from 'debug';
 import OpenAI, { ClientOptions } from 'openai';
 import { Stream } from 'openai/streaming';
 import { LOBE_DEFAULT_MODEL_LIST } from '@/config/aiModels';
-import { RuntimeImageGenParamsValue } from '@/libs/standard-parameters/index';
+import type { AiModelType } from '@/types/aiModel';
 import type { ChatModelCard } from '@/types/llm';
 import { LobeRuntimeAI } from '../../BaseAI';
@@ -30,13 +29,11 @@ import { AgentRuntimeError } from '../createError';
 import { debugResponse, debugStream } from '../debugStream';
 import { desensitizeUrl } from '../desensitizeUrl';
 import { handleOpenAIError } from '../handleOpenAIError';
-import {
-  convertImageUrlToFile,
-  convertOpenAIMessages,
-  convertOpenAIResponseInputs,
-} from '../openaiHelpers';
+import { convertOpenAIMessages, convertOpenAIResponseInputs } from '../openaiHelpers';
+import { postProcessModelList } from '../postProcessModelList';
 import { StreamingResponse } from '../response';
 import { OpenAIResponsesStream, OpenAIStream, OpenAIStreamOptions } from '../streams';
+import { createOpenAICompatibleImage } from './createImage';
 // the model contains the following keywords is not a chat model, so we should filter them out
 export const CHAT_MODELS_BLOCK_LIST = [
@@ -334,107 +331,8 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
         });
       }
-      // Otherwise use default OpenAI compatible implementation
-      const { model, params } = payload;
-      const log = createDebug(`lobe-image:model-runtime`);
-      log('Creating image with model: %s and params: %O', model, params);
-      // 映射参数名称，将 imageUrls 映射为 image
-      const paramsMap = new Map<RuntimeImageGenParamsValue, string>([
-        ['imageUrls', 'image'],
-        ['imageUrl', 'image'],
-      ]);
-      const userInput: Record<string, any> = Object.fromEntries(
-        Object.entries(params).map(([key, value]) => [
-          paramsMap.get(key as RuntimeImageGenParamsValue) ?? key,
-          value,
-        ]),
-      );
-      // https://platform.openai.com/docs/api-reference/images/createEdit
-      const isImageEdit = Array.isArray(userInput.image) && userInput.image.length > 0;
-      // 如果有 imageUrls 参数，将其转换为 File 对象
-      if (isImageEdit) {
-        log('Converting imageUrls to File objects: %O', userInput.image);
-        try {
-          // 转换所有图片 URL 为 File 对象
-          const imageFiles = await Promise.all(
-            userInput.image.map((url: string) => convertImageUrlToFile(url)),
-          );
-          log('Successfully converted %d images to File objects', imageFiles.length);
-          // 根据官方文档，如果有多个图片，传递数组；如果只有一个，传递单个 File
-          userInput.image = imageFiles.length === 1 ? imageFiles[0] : imageFiles;
-        } catch (error) {
-          log('Error converting imageUrls to File objects: %O', error);
-          throw new Error(`Failed to convert image URLs to File objects: ${error}`);
-        }
-      } else {
-        delete userInput.image;
-      }
-      if (userInput.size === 'auto') {
-        delete userInput.size;
-      }
-      const defaultInput = {
-        n: 1,
-        ...(model.includes('dall-e') ? { response_format: 'b64_json' } : {}),
-        ...(isImageEdit ? { input_fidelity: 'high' } : {}),
-      };
-      const options = {
-        model,
-        ...defaultInput,
-        ...userInput,
-      };
-      log('options: %O', options);
-      // 判断是否为图片编辑操作
-      const img = isImageEdit
-        ? await this.client.images.edit(options as any)
-        : await this.client.images.generate(options as any);
-      // 检查响应数据的完整性
-      if (!img || !img.data || !Array.isArray(img.data) || img.data.length === 0) {
-        log('Invalid image response: missing data array');
-        throw new Error('Invalid image response: missing or empty data array');
-      }
-      const imageData = img.data[0];
-      if (!imageData) {
-        log('Invalid image response: first data item is null/undefined');
-        throw new Error('Invalid image response: first data item is null or undefined');
-      }
-      let imageUrl: string;
-      // 处理 base64 格式的响应
-      if (imageData.b64_json) {
-        // 确定图片的 MIME 类型，默认为 PNG
-        const mimeType = 'image/png'; // OpenAI 图片生成默认返回 PNG 格式
-        // 将 base64 字符串转换为完整的 data URL
-        imageUrl = `data:${mimeType};base64,${imageData.b64_json}`;
-        log('Successfully converted base64 to data URL, length: %d', imageUrl.length);
-      }
-      // 处理 URL 格式的响应
-      else if (imageData.url) {
-        imageUrl = imageData.url;
-        log('Using direct image URL: %s', imageUrl);
-      }
-      // 如果两种格式都不存在，抛出错误
-      else {
-        log('Invalid image response: missing both b64_json and url fields');
-        throw new Error('Invalid image response: missing both b64_json and url fields');
-      }
-      return {
-        imageUrl,
-      };
+      // Use the new createOpenAICompatibleImage function
+      return createOpenAICompatibleImage(this.client, payload, provider);
     }
     async models() {
@@ -489,14 +387,9 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
           .filter(Boolean) as ChatModelCard[];
       }
-      return (await Promise.all(
-        resultModels.map(async (model) => {
-          return {
-            ...model,
-            type: model.type || (await getModelPropertyWithFallback(model.id, 'type')),
-          };
-        }),
-      )) as ChatModelCard[];
+      return await postProcessModelList(resultModels, (modelId) =>
+        getModelPropertyWithFallback<AiModelType>(modelId, 'type'),
+      );
     }
     async embeddings(

package/packages/model-runtime/src/utils/postProcessModelList.ts ADDED Viewed

@@ -0,0 +1,55 @@
+import { CHAT_MODEL_IMAGE_GENERATION_PARAMS } from '@/const/image';
+import type { AiModelType } from '@/types/aiModel';
+import type { ChatModelCard } from '@/types/llm';
+// Whitelist for automatic image model generation
+export const IMAGE_GENERATION_MODEL_WHITELIST = [
+  'gemini-2.5-flash-image-preview',
+  // More models can be added in the future
+] as const;
+/**
+ * Process model list: ensure type field exists and generate image generation models for whitelisted models
+ * @param models Original model list
+ * @param getModelTypeProperty Optional callback function to get model type property
+ * @returns Processed model list (including image generation models)
+ */
+export async function postProcessModelList(
+  models: ChatModelCard[],
+  getModelTypeProperty?: (modelId: string) => Promise<AiModelType>,
+): Promise<ChatModelCard[]> {
+  // 1. Ensure all models have type field
+  const finalModels = await Promise.all(
+    models.map(async (model) => {
+      let modelType: AiModelType | undefined = model.type;
+      if (!modelType && getModelTypeProperty) {
+        modelType = await getModelTypeProperty(model.id);
+      }
+      return {
+        ...model,
+        type: modelType || 'chat',
+      };
+    }),
+  );
+  // 2. Check whitelist models and generate corresponding image versions
+  const imageModels: ChatModelCard[] = [];
+  for (const whitelistPattern of IMAGE_GENERATION_MODEL_WHITELIST) {
+    const matchingModels = finalModels.filter((model) => model.id.endsWith(whitelistPattern));
+    for (const model of matchingModels) {
+      imageModels.push({
+        ...model, // Reuse all configurations from the original model
+        id: `${model.id}:image`,
+        // Override to image type
+        parameters: CHAT_MODEL_IMAGE_GENERATION_PARAMS,
+        type: 'image', // Set image parameters
+      });
+    }
+  }
+  return [...finalModels, ...imageModels];
+}

package/packages/model-runtime/src/utils/streams/google-ai.test.ts CHANGED Viewed

@@ -181,7 +181,7 @@ describe('GoogleGenerativeAIStream', () => {
       // usage
       'id: chat_1\n',
       'event: usage\n',
-      `data: {"inputImageTokens":258,"inputTextTokens":8,"outputTextTokens":0,"totalInputTokens":266,"totalOutputTokens":0,"totalTokens":266}\n\n`,
+      `data: {"inputImageTokens":258,"inputTextTokens":8,"outputImageTokens":0,"outputTextTokens":0,"totalInputTokens":266,"totalOutputTokens":0,"totalTokens":266}\n\n`,
     ]);
   });
@@ -227,7 +227,7 @@ describe('GoogleGenerativeAIStream', () => {
       // usage
       'id: chat_1\n',
       'event: usage\n',
-      `data: {"inputCachedTokens":14286,"inputTextTokens":15725,"outputTextTokens":1053,"totalInputTokens":15725,"totalOutputTokens":1053,"totalTokens":16778}\n\n`,
+      `data: {"inputCachedTokens":14286,"inputTextTokens":15725,"outputImageTokens":0,"outputTextTokens":1053,"totalInputTokens":15725,"totalOutputTokens":1053,"totalTokens":16778}\n\n`,
     ]);
   });
@@ -316,7 +316,7 @@ describe('GoogleGenerativeAIStream', () => {
         // usage
         'id: chat_1',
         'event: usage',
-        `data: {"inputTextTokens":19,"outputTextTokens":11,"totalInputTokens":19,"totalOutputTokens":11,"totalTokens":30}\n`,
+        `data: {"inputTextTokens":19,"outputImageTokens":0,"outputTextTokens":11,"totalInputTokens":19,"totalOutputTokens":11,"totalTokens":30}\n`,
       ].map((i) => i + '\n'),
     );
   });
@@ -409,7 +409,7 @@ describe('GoogleGenerativeAIStream', () => {
         // usage
         'id: chat_1',
         'event: usage',
-        `data: {"inputTextTokens":19,"outputReasoningTokens":100,"outputTextTokens":11,"totalInputTokens":19,"totalOutputTokens":111,"totalTokens":131}\n`,
+        `data: {"inputTextTokens":19,"outputImageTokens":0,"outputReasoningTokens":100,"outputTextTokens":11,"totalInputTokens":19,"totalOutputTokens":111,"totalTokens":131}\n`,
       ].map((i) => i + '\n'),
     );
   });
@@ -542,7 +542,7 @@ describe('GoogleGenerativeAIStream', () => {
         // usage
         'id: chat_1',
         'event: usage',
-        `data: {"inputTextTokens":38,"outputReasoningTokens":304,"outputTextTokens":19,"totalInputTokens":38,"totalOutputTokens":323,"totalTokens":361}\n`,
+        `data: {"inputTextTokens":38,"outputImageTokens":0,"outputReasoningTokens":304,"outputTextTokens":19,"totalInputTokens":38,"totalOutputTokens":323,"totalTokens":361}\n`,
       ].map((i) => i + '\n'),
     );
   });
@@ -662,7 +662,7 @@ describe('GoogleGenerativeAIStream', () => {
         // usage
         'id: chat_1',
         'event: usage',
-        `data: {"inputTextTokens":19,"outputReasoningTokens":100,"outputTextTokens":11,"totalInputTokens":19,"totalOutputTokens":111,"totalTokens":131}\n`,
+        `data: {"inputTextTokens":19,"outputImageTokens":0,"outputReasoningTokens":100,"outputTextTokens":11,"totalInputTokens":19,"totalOutputTokens":111,"totalTokens":131}\n`,
       ].map((i) => i + '\n'),
     );
   });
@@ -811,7 +811,7 @@ describe('GoogleGenerativeAIStream', () => {
         // usage
         'id: chat_1',
         'event: usage',
-        `data: {"inputTextTokens":9,"outputTextTokens":122,"totalInputTokens":9,"totalOutputTokens":122,"totalTokens":131}\n`,
+        `data: {"inputTextTokens":9,"outputImageTokens":0,"outputTextTokens":122,"totalInputTokens":9,"totalOutputTokens":122,"totalTokens":131}\n`,
       ].map((i) => i + '\n'),
     );
   });

package/packages/model-runtime/src/utils/streams/google-ai.ts CHANGED Viewed

@@ -57,8 +57,20 @@ const transformGoogleGenerativeAIStream = (
   if (candidate?.finishReason && usage) {
     // totalTokenCount = promptTokenCount + candidatesTokenCount + thoughtsTokenCount
     const reasoningTokens = usage.thoughtsTokenCount;
-    const outputTextTokens = usage.candidatesTokenCount ?? 0;
-    const totalOutputTokens = outputTextTokens + (reasoningTokens ?? 0);
+    const candidatesDetails = usage.candidatesTokensDetails;
+    const candidatesTotal =
+      usage.candidatesTokenCount ??
+      candidatesDetails?.reduce((s: number, i: any) => s + (i?.tokenCount ?? 0), 0) ??
+      0;
+    const outputImageTokens =
+      candidatesDetails?.find((i: any) => i.modality === 'IMAGE')?.tokenCount ?? 0;
+    const outputTextTokens =
+      candidatesDetails?.find((i: any) => i.modality === 'TEXT')?.tokenCount ??
+      Math.max(0, candidatesTotal - outputImageTokens);
+    const totalOutputTokens = candidatesTotal + (reasoningTokens ?? 0);
     usageChunks.push(
       { data: candidate.finishReason, id: context?.id, type: 'stop' },
@@ -69,6 +81,7 @@ const transformGoogleGenerativeAIStream = (
             ?.tokenCount,
           inputTextTokens: usage.promptTokensDetails?.find((i) => i.modality === 'TEXT')
             ?.tokenCount,
+          outputImageTokens,
           outputReasoningTokens: reasoningTokens,
           outputTextTokens,
           totalInputTokens: usage.promptTokenCount,