npm - @lobehub/chat - Versions diffs - 1.81.6 → 1.81.8 - Mend

@lobehub/chat 1.81.6 → 1.81.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/CHANGELOG.md +50 -0
package/changelog/v1.json +18 -0
package/locales/ar/chat.json +10 -0
package/locales/bg-BG/chat.json +10 -0
package/locales/de-DE/chat.json +10 -0
package/locales/en-US/chat.json +10 -0
package/locales/es-ES/chat.json +10 -0
package/locales/fa-IR/chat.json +10 -0
package/locales/fr-FR/chat.json +10 -0
package/locales/it-IT/chat.json +10 -0
package/locales/ja-JP/chat.json +10 -0
package/locales/ko-KR/chat.json +10 -0
package/locales/nl-NL/chat.json +10 -0
package/locales/pl-PL/chat.json +10 -0
package/locales/pt-BR/chat.json +10 -0
package/locales/ru-RU/chat.json +10 -0
package/locales/tr-TR/chat.json +10 -0
package/locales/vi-VN/chat.json +10 -0
package/locales/zh-CN/chat.json +10 -0
package/locales/zh-TW/chat.json +10 -0
package/package.json +1 -1
package/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx +31 -4
package/src/features/Conversation/Extras/Usage/index.tsx +1 -1
package/src/libs/agent-runtime/anthropic/index.ts +7 -3
package/src/libs/agent-runtime/perplexity/index.test.ts +4 -1
package/src/libs/agent-runtime/togetherai/index.ts +19 -23
package/src/libs/agent-runtime/togetherai/type.ts +2 -2
package/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts +17 -8
package/src/libs/agent-runtime/utils/streams/anthropic.test.ts +11 -5
package/src/libs/agent-runtime/utils/streams/anthropic.ts +11 -2
package/src/libs/agent-runtime/utils/streams/openai.ts +5 -2
package/src/libs/agent-runtime/utils/streams/protocol.test.ts +67 -1
package/src/libs/agent-runtime/utils/streams/protocol.ts +46 -1
package/src/locales/default/chat.ts +11 -0
package/src/store/chat/slices/aiChat/actions/generateAIChat.ts +2 -2
package/src/types/message/base.ts +8 -0
package/src/utils/fetch/fetchSSE.ts +17 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,56 @@
 # Changelog
+### [Version 1.81.8](https://github.com/lobehub/lobe-chat/compare/v1.81.7...v1.81.8)
+<sup>Released on **2025-04-21**</sup>
+#### 💄 Styles
+- **misc**: Show token generate performance.
+<br/>
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+#### Styles
+- **misc**: Show token generate performance, closes [#6959](https://github.com/lobehub/lobe-chat/issues/6959) ([33c3fe7](https://github.com/lobehub/lobe-chat/commit/33c3fe7))
+</details>
+<div align="right">
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+</div>
+### [Version 1.81.7](https://github.com/lobehub/lobe-chat/compare/v1.81.6...v1.81.7)
+<sup>Released on **2025-04-21**</sup>
+#### 🐛 Bug Fixes
+- **misc**: Together.ai fetch model list.
+<br/>
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+#### What's fixed
+- **misc**: Together.ai fetch model list, closes [#7498](https://github.com/lobehub/lobe-chat/issues/7498) ([bd797b8](https://github.com/lobehub/lobe-chat/commit/bd797b8))
+</details>
+<div align="right">
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+</div>
 ### [Version 1.81.6](https://github.com/lobehub/lobe-chat/compare/v1.81.5...v1.81.6)
 <sup>Released on **2025-04-21**</sup>

package/changelog/v1.json CHANGED Viewed

@@ -1,4 +1,22 @@
 [
+  {
+    "children": {
+      "improvements": [
+        "Show token generate performance."
+      ]
+    },
+    "date": "2025-04-21",
+    "version": "1.81.8"
+  },
+  {
+    "children": {
+      "fixes": [
+        "Together.ai fetch model list."
+      ]
+    },
+    "date": "2025-04-21",
+    "version": "1.81.7"
+  },
   {
     "children": {
       "improvements": [

package/locales/ar/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "مخرجات نصية",
       "outputTitle": "تفاصيل المخرجات",
       "reasoning": "تفكير عميق",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "عدد الرموز في الثانية، TPS. يشير إلى متوسط سرعة توليد المحتوى بواسطة الذكاء الاصطناعي (رمز/ثانية)، ويبدأ الحساب عند استلام أول رمز."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "الوقت حتى أول رمز، TTFT. يشير إلى الفارق الزمني من لحظة إرسال الرسالة حتى استلام أول رمز في العميل."
+        }
+      },
       "title": "تفاصيل التوليد",
       "total": "الإجمالي المستهلك"
     }

package/locales/bg-BG/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "Текстов изход",
       "outputTitle": "Детайли за изхода",
       "reasoning": "Дълбочинно разсъждение",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Токени на секунда, TPS. Отнася се до средната скорост на генериране на съдържание от AI (Токен/секунда), започвайки да се изчислява след получаване на първия токен."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "Време до първия токен, TTFT. Отнася се до времевия интервал от изпращането на съобщението до получаването на първия токен от клиента."
+        }
+      },
       "title": "Детайли за генериране",
       "total": "Общо разходи"
     }

package/locales/de-DE/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "Text-Ausgabe",
       "outputTitle": "Ausgabedetails",
       "reasoning": "Tiefes Denken",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Tokens pro Sekunde, TPS. Bezieht sich auf die durchschnittliche Geschwindigkeit, mit der AI-Inhalte generiert werden (Token/Sekunde), und beginnt mit der Berechnung, nachdem das erste Token empfangen wurde."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "Zeit bis zum ersten Token, TTFT. Bezieht sich auf das Zeitintervall von dem Moment, in dem Sie eine Nachricht senden, bis der Client das erste Token erhält."
+        }
+      },
       "title": "Generierungsdetails",
       "total": "Gesamter Verbrauch"
     }

package/locales/en-US/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "Text Output",
       "outputTitle": "Output Details",
       "reasoning": "Deep Thinking",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Tokens Per Second (TPS). This indicates the average speed of AI-generated content (Tokens/second), calculated from the moment the first Token is received."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "Time To First Token (TTFT). This refers to the time interval from when you send a message to when the client receives the first Token."
+        }
+      },
       "title": "Generation Details",
       "total": "Total Consumption"
     }

package/locales/es-ES/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "Salida de texto",
       "outputTitle": "Detalles de salida",
       "reasoning": "Razonamiento profundo",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Tokens Por Segundo, TPS. Se refiere a la velocidad promedio de generación de contenido por la IA (Token/segundo), comenzando a contar desde que se recibe el primer Token."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "Tiempo Hasta el Primer Token, TTFT. Se refiere al intervalo de tiempo desde que envía un mensaje hasta que el cliente recibe el primer Token."
+        }
+      },
       "title": "Detalles de generación",
       "total": "Total consumido"
     }

package/locales/fa-IR/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "خروجی متنی",
       "outputTitle": "جزئیات خروجی",
       "reasoning": "تفکر عمیق",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "تعداد توکن در ثانیه، TPS. به میانگین سرعت تولید محتوای AI اشاره دارد (توکن/ثانیه) و از زمان دریافت اولین توکن شروع به محاسبه می‌شود."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "زمان تا اولین توکن، TTFT. به فاصله زمانی بین ارسال پیام شما و دریافت اولین توکن توسط کلاینت اشاره دارد."
+        }
+      },
       "title": "جزئیات تولید",
       "total": "مجموع مصرف"
     }

package/locales/fr-FR/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "Sortie texte",
       "outputTitle": "Détails de la sortie",
       "reasoning": "Raisonnement approfondi",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Tokens Par Seconde, TPS. Indique la vitesse moyenne de génération de contenu par l'IA (Token/seconde), calculée à partir de la réception du premier Token."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "Temps Jusqu'au Premier Token, TTFT. Indique l'intervalle de temps entre l'envoi de votre message et la réception du premier Token par le client."
+        }
+      },
       "title": "Détails de génération",
       "total": "Total consommé"
     }

package/locales/it-IT/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "Output testo",
       "outputTitle": "Dettagli output",
       "reasoning": "Ragionamento profondo",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Token Per Second, TPS. Indica la velocità media di generazione dei contenuti da parte dell'AI (Token/secondo), calcolata a partire dalla ricezione del primo Token."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "Time To First Token, TTFT. Indica l'intervallo di tempo che intercorre tra l'invio del messaggio e la ricezione del primo Token da parte del client."
+        }
+      },
       "title": "Dettagli generati",
       "total": "Totale consumato"
     }

package/locales/ja-JP/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "テキスト出力",
       "outputTitle": "出力の詳細",
       "reasoning": "深い思考",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "トークン毎秒（TPS）。AIが生成するコンテンツの平均速度（トークン/秒）を示し、最初のトークンを受信した時点から計算が始まります。"
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "最初のトークンまでの時間（TTFT）。メッセージを送信してからクライアントが最初のトークンを受信するまでの時間間隔を示します。"
+        }
+      },
       "title": "生成の詳細",
       "total": "合計消費"
     }

package/locales/ko-KR/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "텍스트 출력",
       "outputTitle": "출력 세부사항",
       "reasoning": "심층 사고",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "초당 토큰 수(TPS). AI가 생성한 콘텐츠의 평균 속도(토큰/초)를 나타내며, 첫 번째 토큰을 수신한 후부터 계산됩니다."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "첫 번째 토큰까지의 시간(Time To First Token, TTFT). 메시지를 전송한 시점부터 클라이언트가 첫 번째 토큰을 수신할 때까지의 시간 간격을 나타냅니다."
+        }
+      },
       "title": "생성 세부사항",
       "total": "총 소모"
     }

package/locales/nl-NL/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "Tekstuitvoer",
       "outputTitle": "Uitvoerdetails",
       "reasoning": "Diep nadenken",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Tokens Per Second, TPS. Dit verwijst naar de gemiddelde snelheid van AI-gegenereerde inhoud (Token/seconde), die begint te tellen vanaf het moment dat het eerste Token is ontvangen."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "Time To First Token, TTFT. Dit verwijst naar de tijdsduur van het moment dat u een bericht verzendt tot het moment dat de client het eerste Token ontvangt."
+        }
+      },
       "title": "Genereren van details",
       "total": "Totaal verbruik"
     }

package/locales/pl-PL/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "Wyjście tekstowe",
       "outputTitle": "Szczegóły wyjścia",
       "reasoning": "Głębokie myślenie",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Tokeny na sekundę, TPS. Oznacza średnią prędkość generowania treści przez AI (Tokeny/sekundę), obliczaną od momentu otrzymania pierwszego Tokena."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "Czas do pierwszego tokena, TTFT. Oznacza czas od momentu wysłania wiadomości do momentu, w którym klient otrzymuje pierwszy Token."
+        }
+      },
       "title": "Szczegóły generacji",
       "total": "Całkowite zużycie"
     }

package/locales/pt-BR/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "Saída de texto",
       "outputTitle": "Detalhes da saída",
       "reasoning": "Raciocínio profundo",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Tokens Por Segundo, TPS. Refere-se à velocidade média de geração de conteúdo pela IA (Token/segundo), começando a contagem após o recebimento do primeiro Token."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "Tempo Para o Primeiro Token, TTFT. Refere-se ao intervalo de tempo desde que você envia a mensagem até que o cliente receba o primeiro Token."
+        }
+      },
       "title": "Detalhes da geração",
       "total": "Total consumido"
     }

package/locales/ru-RU/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "Текстовый выход",
       "outputTitle": "Детали выхода",
       "reasoning": "Глубокое мышление",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Токены в секунду (TPS). Средняя скорость генерации контента ИИ (Токенов/сек), начинается с момента получения первого токена."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "Время до первого токена (TTFT). Время, прошедшее с момента отправки вами сообщения до получения первого токена клиентом."
+        }
+      },
       "title": "Детали генерации",
       "total": "Общее потребление"
     }

package/locales/tr-TR/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "Metin çıkışı",
       "outputTitle": "Çıkış detayları",
       "reasoning": "Derin düşünme",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Token Başına Saniye, TPS. AI tarafından üretilen içeriğin ortalama hızını (Token/saniye) belirtir, ilk Token alındıktan sonra hesaplanmaya başlanır."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "İlk Token'a Kadar Geçen Süre, TTFT. Mesajınızı göndermenizden, istemcinin ilk Token'ı almasına kadar geçen zaman aralığını belirtir."
+        }
+      },
       "title": "Üretim detayları",
       "total": "Toplam tüketim"
     }

package/locales/vi-VN/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "Văn bản xuất",
       "outputTitle": "Chi tiết xuất",
       "reasoning": "Suy nghĩ sâu sắc",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Tokens Mỗi Giây, TPS. Chỉ tốc độ trung bình của nội dung do AI tạo ra (Token/giây), bắt đầu tính từ khi nhận được Token đầu tiên."
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "Thời Gian Đến Token Đầu Tiên, TTFT. Chỉ khoảng thời gian từ khi bạn gửi tin nhắn đến khi khách hàng nhận được Token đầu tiên."
+        }
+      },
       "title": "Chi tiết tạo ra",
       "total": "Tổng tiêu thụ"
     }

package/locales/zh-CN/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "文本输出",
       "outputTitle": "输出明细",
       "reasoning": "深度思考",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "Tokens Per Second，TPS。指AI生成内容的平均速度（Token/秒），在接收到首个 Token 后开始计算。"
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "Time To First Token，TTFT。指从您发送消息到客户端接收到首个 Token 的时间间隔。"
+        }
+      },
       "title": "生成明细",
       "total": "总计消耗"
     }

package/locales/zh-TW/chat.json CHANGED Viewed

@@ -119,6 +119,16 @@
       "outputText": "文本輸出",
       "outputTitle": "輸出明細",
       "reasoning": "深度思考",
+      "speed": {
+        "tps": {
+          "title": "TPS",
+          "tooltip": "每秒令牌數，TPS。指AI生成內容的平均速度（Token/秒），在接收到首個令牌後開始計算。"
+        },
+        "ttft": {
+          "title": "TTFT",
+          "tooltip": "首次令牌時間，TTFT。指從您發送消息到客戶端接收到首個令牌的時間間隔。"
+        }
+      },
       "title": "生成明細",
       "total": "總計消耗"
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lobehub/chat",
-  "version": "1.81.6",
+  "version": "1.81.8",
   "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
   "keywords": [
     "framework",

package/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx CHANGED Viewed

@@ -6,10 +6,11 @@ import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Center, Flexbox } from 'react-layout-kit';
+import InfoTooltip from '@/components/InfoTooltip';
 import { aiModelSelectors, useAiInfraStore } from '@/store/aiInfra';
 import { useGlobalStore } from '@/store/global';
 import { systemStatusSelectors } from '@/store/global/selectors';
-import { ModelTokensUsage } from '@/types/message';
+import { MessageMetadata } from '@/types/message';
 import { formatNumber } from '@/utils/format';
 import ModelCard from './ModelCard';
@@ -17,19 +18,19 @@ import TokenProgress, { TokenProgressItem } from './TokenProgress';
 import { getDetailsToken } from './tokens';
 interface TokenDetailProps {
+  meta: MessageMetadata;
   model: string;
   provider: string;
-  usage: ModelTokensUsage;
 }
-const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
+const TokenDetail = memo<TokenDetailProps>(({ meta, model, provider }) => {
   const { t } = useTranslation('chat');
   const theme = useTheme();
   const modelCard = useAiInfraStore(aiModelSelectors.getModelCard(model, provider));
   const isShowCredit = useGlobalStore(systemStatusSelectors.isShowCredit) && !!modelCard?.pricing;
-  const detailTokens = getDetailsToken(usage, modelCard);
+  const detailTokens = getDetailsToken(meta, modelCard);
   const inputDetails = [
     !!detailTokens.inputAudio && {
       color: theme.cyan9,
@@ -113,6 +114,10 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
     detailTokens.totalTokens!.credit / detailTokens.totalTokens!.token,
     2,
   );
+  const tps = meta?.tps ? formatNumber(meta.tps, 2) : undefined;
+  const ttft = meta?.ttft ? formatNumber(meta.ttft / 1000, 2) : undefined;
   return (
     <Popover
       arrow={false}
@@ -170,6 +175,28 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
                   <div style={{ fontWeight: 500 }}>{averagePricing}</div>
                 </Flexbox>
               )}
+              {tps && (
+                <Flexbox align={'center'} gap={4} horizontal justify={'space-between'}>
+                  <Flexbox gap={8} horizontal>
+                    <div style={{ color: theme.colorTextSecondary }}>
+                      {t('messages.tokenDetails.speed.tps.title')}
+                    </div>
+                    <InfoTooltip title={t('messages.tokenDetails.speed.tps.tooltip')} />
+                  </Flexbox>
+                  <div style={{ fontWeight: 500 }}>{tps}</div>
+                </Flexbox>
+              )}
+              {ttft && (
+                <Flexbox align={'center'} gap={4} horizontal justify={'space-between'}>
+                  <Flexbox gap={8} horizontal>
+                    <div style={{ color: theme.colorTextSecondary }}>
+                      {t('messages.tokenDetails.speed.ttft.title')}
+                    </div>
+                    <InfoTooltip title={t('messages.tokenDetails.speed.ttft.tooltip')} />
+                  </Flexbox>
+                  <div style={{ fontWeight: 500 }}>{ttft}s</div>
+                </Flexbox>
+              )}
             </Flexbox>
           </Flexbox>
         </Flexbox>

package/src/features/Conversation/Extras/Usage/index.tsx CHANGED Viewed

@@ -31,7 +31,7 @@ const Usage = memo<UsageProps>(({ model, metadata, provider }) => {
       </Center>
       {!!metadata.totalTokens && (
-        <TokenDetail model={model as string} provider={provider} usage={metadata} />
+        <TokenDetail meta={metadata} model={model as string} provider={provider} />
       )}
     </Flexbox>
   );

package/src/libs/agent-runtime/anthropic/index.ts CHANGED Viewed

@@ -56,6 +56,7 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
   async chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions) {
     try {
       const anthropicPayload = await this.buildAnthropicPayload(payload);
+      const inputStartAt = Date.now();
       if (this.isDebug()) {
         console.log('[requestPayload]');
@@ -79,9 +80,12 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
         debugStream(debug.toReadableStream()).catch(console.error);
       }
-      return StreamingResponse(AnthropicStream(prod, options?.callback), {
-        headers: options?.headers,
-      });
+      return StreamingResponse(
+        AnthropicStream(prod, { callbacks: options?.callback, inputStartAt }),
+        {
+          headers: options?.headers,
+        },
+      );
     } catch (error) {
       throw this.handleError(error);
     }

package/src/libs/agent-runtime/perplexity/index.test.ts CHANGED Viewed

@@ -225,7 +225,10 @@ describe('LobePerplexityAI', () => {
         stream.push(decoder.decode(value));
       }
-      expect(stream).toEqual(
+      // Slice out speed chunk
+      const noSpeedStream = stream.slice(0, -3);
+      expect(noSpeedStream).toEqual(
         [
           'id: 506d64fb-e7f2-4d94-b80f-158369e9446d',
           'event: text',

package/src/libs/agent-runtime/togetherai/index.ts CHANGED Viewed

@@ -1,9 +1,9 @@
+import type { ChatModelCard } from '@/types/llm';
 import { ModelProvider } from '../types';
 import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';
 import { TogetherAIModel } from './type';
-import type { ChatModelCard } from '@/types/llm';
 export const LobeTogetherAI = LobeOpenAICompatibleFactory({
   baseURL: 'https://api.together.xyz/v1',
   constructorOptions: {
@@ -18,24 +18,20 @@ export const LobeTogetherAI = LobeOpenAICompatibleFactory({
   models: async ({ client }) => {
     const { LOBE_DEFAULT_MODEL_LIST } = await import('@/config/aiModels');
-    const visionKeywords = [
-      'qvq',
-      'vision',
-    ];
+    const visionKeywords = ['qvq', 'vision'];
-    const reasoningKeywords = [
-      'deepseek-r1',
-      'qwq',
-    ];
+    const reasoningKeywords = ['deepseek-r1', 'qwq'];
     client.baseURL = 'https://api.together.xyz/api';
-    const modelsPage = await client.models.list() as any;
+    const modelsPage = (await client.models.list()) as any;
     const modelList: TogetherAIModel[] = modelsPage.body;
     return modelList
       .map((model) => {
-        const knownModel = LOBE_DEFAULT_MODEL_LIST.find((m) => model.name.toLowerCase() === m.id.toLowerCase());
+        const knownModel = LOBE_DEFAULT_MODEL_LIST.find(
+          (m) => model.id.toLowerCase() === m.id.toLowerCase(),
+        );
         return {
           contextWindowTokens: knownModel?.contextWindowTokens ?? undefined,
@@ -43,21 +39,21 @@ export const LobeTogetherAI = LobeOpenAICompatibleFactory({
           displayName: model.display_name,
           enabled: knownModel?.enabled || false,
           functionCall:
-            model.description?.toLowerCase().includes('function calling')
-            || knownModel?.abilities?.functionCall
-            || false,
-          id: model.name,
+            model.description?.toLowerCase().includes('function calling') ||
+            knownModel?.abilities?.functionCall ||
+            false,
+          id: model.id,
           maxOutput: model.context_length,
           reasoning:
-            reasoningKeywords.some(keyword => model.name.toLowerCase().includes(keyword))
-            || knownModel?.abilities?.functionCall
-            || false,
+            reasoningKeywords.some((keyword) => model.id.toLowerCase().includes(keyword)) ||
+            knownModel?.abilities?.functionCall ||
+            false,
           tokens: model.context_length,
           vision:
-            model.description?.toLowerCase().includes('vision')
-            || visionKeywords.some(keyword => model.name?.toLowerCase().includes(keyword))
-            || knownModel?.abilities?.functionCall
-            || false,
+            model.description?.toLowerCase().includes('vision') ||
+            visionKeywords.some((keyword) => model.id?.toLowerCase().includes(keyword)) ||
+            knownModel?.abilities?.functionCall ||
+            false,
         };
       })
       .filter(Boolean) as ChatModelCard[];

package/src/libs/agent-runtime/togetherai/type.ts CHANGED Viewed

@@ -50,7 +50,8 @@ interface Depth {
 }
 export interface TogetherAIModel {
-  _id: string;
+  id: string;
+  // eslint-disable-next-line typescript-sort-keys/interface
   access: string;
   config: Config;
   context_length: number;
@@ -67,7 +68,6 @@ export interface TogetherAIModel {
   license: string;
   link: string;
   modelInstanceConfig: ModelInstanceConfig;
-  name: string;
   num_parameters: number;
   pricing: Pricing;
   show_in_playground: boolean;

package/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts CHANGED Viewed

@@ -201,6 +201,7 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
     async chat({ responseMode, ...payload }: ChatStreamPayload, options?: ChatCompetitionOptions) {
       try {
+        const inputStartAt = Date.now();
         const postPayload = chatCompletion?.handlePayload
           ? chatCompletion.handlePayload(payload, this._options)
           : ({
@@ -253,10 +254,14 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
             debugStream(useForDebugStream).catch(console.error);
           }
-          const streamHandler = chatCompletion?.handleStream || OpenAIStream;
-          return StreamingResponse(streamHandler(prod, streamOptions), {
-            headers: options?.headers,
-          });
+          return StreamingResponse(
+            chatCompletion?.handleStream
+              ? chatCompletion.handleStream(prod, streamOptions.callbacks)
+              : OpenAIStream(prod, { ...streamOptions, inputStartAt }),
+            {
+              headers: options?.headers,
+            },
+          );
         }
         if (debug?.chatCompletion?.()) {
@@ -269,10 +274,14 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
           chatCompletion?.handleTransformResponseToStream || transformResponseToStream;
         const stream = transformHandler(response as unknown as OpenAI.ChatCompletion);
-        const streamHandler = chatCompletion?.handleStream || OpenAIStream;
-        return StreamingResponse(streamHandler(stream, streamOptions), {
-          headers: options?.headers,
-        });
+        return StreamingResponse(
+          chatCompletion?.handleStream
+            ? chatCompletion.handleStream(stream, streamOptions.callbacks)
+            : OpenAIStream(stream, { ...streamOptions, inputStartAt }),
+          {
+            headers: options?.headers,
+          },
+        );
       } catch (error) {
         throw this.handleError(error);
       }

package/src/libs/agent-runtime/utils/streams/anthropic.test.ts CHANGED Viewed

@@ -61,9 +61,11 @@ describe('AnthropicStream', () => {
     const onCompletionMock = vi.fn();
     const protocolStream = AnthropicStream(mockAnthropicStream, {
-      onStart: onStartMock,
-      onText: onTextMock,
-      onCompletion: onCompletionMock,
+      callbacks: {
+        onStart: onStartMock,
+        onText: onTextMock,
+        onCompletion: onCompletionMock,
+      },
     });
     const decoder = new TextDecoder();
@@ -165,7 +167,9 @@ describe('AnthropicStream', () => {
     const onToolCallMock = vi.fn();
     const protocolStream = AnthropicStream(mockReadableStream, {
-      onToolsCalling: onToolCallMock,
+      callbacks: {
+        onToolsCalling: onToolCallMock,
+      },
     });
     const decoder = new TextDecoder();
@@ -317,7 +321,9 @@ describe('AnthropicStream', () => {
     const onToolCallMock = vi.fn();
     const protocolStream = AnthropicStream(mockReadableStream, {
-      onToolsCalling: onToolCallMock,
+      callbacks: {
+        onToolsCalling: onToolCallMock,
+      },
     });
     const decoder = new TextDecoder();

package/src/libs/agent-runtime/utils/streams/anthropic.ts CHANGED Viewed

@@ -12,6 +12,7 @@ import {
   convertIterableToStream,
   createCallbacksTransformer,
   createSSEProtocolTransformer,
+  createTokenSpeedCalculator,
 } from './protocol';
 export const transformAnthropicStream = (
@@ -188,9 +189,14 @@ export const transformAnthropicStream = (
   }
 };
+export interface AnthropicStreamOptions {
+  callbacks?: ChatStreamCallbacks;
+  inputStartAt?: number;
+}
 export const AnthropicStream = (
   stream: Stream<Anthropic.MessageStreamEvent> | ReadableStream,
-  callbacks?: ChatStreamCallbacks,
+  { callbacks, inputStartAt }: AnthropicStreamOptions = {},
 ) => {
   const streamStack: StreamContext = { id: '' };
@@ -198,6 +204,9 @@ export const AnthropicStream = (
     stream instanceof ReadableStream ? stream : convertIterableToStream(stream);
   return readableStream
-    .pipeThrough(createSSEProtocolTransformer(transformAnthropicStream, streamStack))
+    .pipeThrough(
+      createTokenSpeedCalculator(transformAnthropicStream, { inputStartAt, streamStack }),
+    )
+    .pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
     .pipeThrough(createCallbacksTransformer(callbacks));
 };

package/src/libs/agent-runtime/utils/streams/openai.ts CHANGED Viewed

@@ -16,6 +16,7 @@ import {
   createCallbacksTransformer,
   createFirstErrorHandleTransformer,
   createSSEProtocolTransformer,
+  createTokenSpeedCalculator,
   generateToolCallId,
 } from './protocol';
@@ -218,12 +219,13 @@ export interface OpenAIStreamOptions {
     name: string;
   }) => ILobeAgentRuntimeErrorType | undefined;
   callbacks?: ChatStreamCallbacks;
+  inputStartAt?: number;
   provider?: string;
 }
 export const OpenAIStream = (
   stream: Stream<OpenAI.ChatCompletionChunk> | ReadableStream,
-  { callbacks, provider, bizErrorTypeTransformer }: OpenAIStreamOptions = {},
+  { callbacks, provider, bizErrorTypeTransformer, inputStartAt }: OpenAIStreamOptions = {},
 ) => {
   const streamStack: StreamContext = { id: '' };
@@ -236,7 +238,8 @@ export const OpenAIStream = (
       // provider like huggingface or minimax will return error in the stream,
       // so in the first Transformer, we need to handle the error
       .pipeThrough(createFirstErrorHandleTransformer(bizErrorTypeTransformer, provider))
-      .pipeThrough(createSSEProtocolTransformer(transformOpenAIStream, streamStack))
+      .pipeThrough(createTokenSpeedCalculator(transformOpenAIStream, { inputStartAt, streamStack }))
+      .pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
       .pipeThrough(createCallbacksTransformer(callbacks))
   );
 };

package/src/libs/agent-runtime/utils/streams/protocol.test.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest';
-import { createSSEDataExtractor } from './protocol';
+import { createSSEDataExtractor, createTokenSpeedCalculator } from './protocol';
 describe('createSSEDataExtractor', () => {
   // Helper function to convert string to Uint8Array
@@ -135,3 +135,69 @@ describe('createSSEDataExtractor', () => {
     });
   });
 });
+describe('createTokenSpeedCalculator', async () => {
+  // Mock the param from caller - 1000 to avoid div 0
+  const inputStartAt = Date.now() - 1000;
+  // Helper function to process chunks through transformer
+  const processChunk = async (transformer: TransformStream, chunk: any) => {
+    const results: any[] = [];
+    const readable = new ReadableStream({
+      start(controller) {
+        controller.enqueue(chunk);
+        controller.close();
+      },
+    });
+    const writable = new WritableStream({
+      write(chunk) {
+        results.push(chunk);
+      },
+    });
+    await readable.pipeThrough(transformer).pipeTo(writable);
+    return results;
+  };
+  it('should calculate token speed correctly', async () => {
+    const chunks = [
+      { data: '', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'text' },
+      { data: 'hi', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'text' },
+      { data: 'stop', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'stop' },
+      {
+        data: {
+          inputTextTokens: 9,
+          outputTextTokens: 1,
+          totalInputTokens: 9,
+          totalOutputTokens: 1,
+          totalTokens: 10,
+        },
+        id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy',
+        type: 'usage',
+      },
+    ];
+    const transformer = createTokenSpeedCalculator((v) => v, { inputStartAt });
+    const results = await processChunk(transformer, chunks);
+    expect(results).toHaveLength(chunks.length + 1);
+    const speedChunk = results.slice(-1)[0];
+    expect(speedChunk.id).toBe('output_speed');
+    expect(speedChunk.type).toBe('speed');
+    expect(speedChunk.data.tps).not.toBeNaN();
+    expect(speedChunk.data.ttft).not.toBeNaN();
+  });
+  it('should not calculate token speed if no usage', async () => {
+    const chunks = [
+      { data: '', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'text' },
+      { data: 'hi', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'text' },
+      { data: 'stop', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'stop' },
+    ];
+    const transformer = createTokenSpeedCalculator((v) => v, { inputStartAt });
+    const results = await processChunk(transformer, chunks);
+    expect(results).toHaveLength(chunks.length);
+  });
+});

package/src/libs/agent-runtime/utils/streams/protocol.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelTokensUsage } from '@/types/message';
+import { ModelSpeed, ModelTokensUsage } from '@/types/message';
 import { safeParseJSON } from '@/utils/safeParseJSON';
 import { AgentRuntimeErrorType } from '../../error';
@@ -52,6 +52,8 @@ export interface StreamProtocolChunk {
     | 'error'
     // token usage
     | 'usage'
+    // performance monitor
+    | 'speed'
     // unknown data result
     | 'data';
 }
@@ -287,3 +289,46 @@ export const createSSEDataExtractor = () =>
       }
     },
   });
+export const TOKEN_SPEED_CHUNK_ID = 'output_speed';
+/**
+ * Create a middleware to calculate the token generate speed
+ * @requires createSSEProtocolTransformer
+ */
+export const createTokenSpeedCalculator = (
+  transformer: (chunk: any, stack: StreamContext) => StreamProtocolChunk | StreamProtocolChunk[],
+  { streamStack, inputStartAt }: { inputStartAt?: number; streamStack?: StreamContext } = {},
+) => {
+  let outputStartAt: number | undefined;
+  const process = (chunk: StreamProtocolChunk) => {
+    let result = [chunk];
+    // if the chunk is the first text chunk, set as output start
+    if (!outputStartAt && chunk.type === 'text') outputStartAt = Date.now();
+    // if the chunk is the stop chunk, set as output finish
+    if (inputStartAt && outputStartAt && chunk.type === 'usage') {
+      const outputTokens = chunk.data?.totalOutputTokens || chunk.data?.outputTextTokens;
+      result.push({
+        data: {
+          tps: (outputTokens / (Date.now() - outputStartAt)) * 1000,
+          ttft: outputStartAt - inputStartAt,
+        } as ModelSpeed,
+        id: TOKEN_SPEED_CHUNK_ID,
+        type: 'speed',
+      });
+    }
+    return result;
+  };
+  return new TransformStream({
+    transform(chunk, controller) {
+      let result = transformer(chunk, streamStack || { id: '' });
+      if (!Array.isArray(result)) result = [result];
+      result.forEach((r) => {
+        const processed = process(r);
+        if (processed) processed.forEach((p) => controller.enqueue(p));
+      });
+    },
+  });
+};

package/src/locales/default/chat.ts CHANGED Viewed

@@ -122,6 +122,17 @@ export default {
       outputText: '文本输出',
       outputTitle: '输出明细',
       reasoning: '深度思考',
+      speed: {
+        tps: {
+          title: 'TPS',
+          tooltip:
+            'Tokens Per Second，TPS。指AI生成内容的平均速度（Token/秒），在接收到首个 Token 后开始计算。',
+        },
+        ttft: {
+          title: 'TTFT',
+          tooltip: 'Time To First Token，TTFT。指从您发送消息到客户端接收到首个 Token 的时间间隔。',
+        },
+      },
       title: '生成明细',
       total: '总计消耗',
     },

package/src/store/chat/slices/aiChat/actions/generateAIChat.ts CHANGED Viewed

@@ -576,7 +576,7 @@ export const generateAIChat: StateCreator<
       },
       onFinish: async (
         content,
-        { traceId, observationId, toolCalls, reasoning, grounding, usage },
+        { traceId, observationId, toolCalls, reasoning, grounding, usage, speed },
       ) => {
         // if there is traceId, update it
         if (traceId) {
@@ -611,8 +611,8 @@ export const generateAIChat: StateCreator<
           toolCalls,
           reasoning: !!reasoning ? { ...reasoning, duration } : undefined,
           search: !!grounding?.citations ? grounding : undefined,
-          metadata: usage,
           imageList: finalImages.length > 0 ? finalImages : undefined,
+          metadata: speed ? { ...usage, ...speed } : usage,
         });
       },
       onMessageHandle: async (chunk) => {

package/src/types/message/base.ts CHANGED Viewed

@@ -44,8 +44,16 @@ export interface ModelTokensUsage {
   totalTokens?: number;
 }
+export interface ModelSpeed {
+  // tokens per second
+  tps?: number;
+  // time to fist token
+  ttft?: number;
+}
 export interface MessageMetadata extends ModelTokensUsage {
   tps?: number;
+  ttft?: number;
 }
 export type MessageRoleType = 'user' | 'system' | 'assistant' | 'tool';

package/src/utils/fetch/fetchSSE.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import {
   MessageToolCallChunk,
   MessageToolCallSchema,
   ModelReasoning,
+  ModelSpeed,
   ModelTokensUsage,
 } from '@/types/message';
 import { ChatImageChunk } from '@/types/message/image';
@@ -29,6 +30,7 @@ export type OnFinishHandler = (
     images?: ChatImageChunk[];
     observationId?: string | null;
     reasoning?: ModelReasoning;
+    speed?: ModelSpeed;
     toolCalls?: MessageToolCall[];
     traceId?: string | null;
     type?: SSEFinishType;
@@ -41,6 +43,11 @@ export interface MessageUsageChunk {
   usage: ModelTokensUsage;
 }
+export interface MessageSpeedChunk {
+  speed: ModelSpeed;
+  type: 'speed';
+}
 export interface MessageTextChunk {
   text: string;
   type: 'text';
@@ -82,7 +89,8 @@ export interface FetchSSEOptions {
       | MessageReasoningChunk
       | MessageGroundingChunk
       | MessageUsageChunk
-      | MessageBase64ImageChunk,
+      | MessageBase64ImageChunk
+      | MessageSpeedChunk,
   ) => void;
   smoothing?: SmoothingParams | boolean;
 }
@@ -342,6 +350,7 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
   let grounding: GroundingSearch | undefined = undefined;
   let usage: ModelTokensUsage | undefined = undefined;
   let images: ChatImageChunk[] = [];
+  let speed: ModelSpeed | undefined = undefined;
   await fetchEventSource(url, {
     body: options.body,
@@ -433,6 +442,12 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
           break;
         }
+        case 'speed': {
+          speed = data;
+          options.onMessageHandle?.({ speed: data, type: 'speed' });
+          break;
+        }
         case 'grounding': {
           grounding = data;
           options.onMessageHandle?.({ grounding: data, type: 'grounding' });
@@ -517,6 +532,7 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
         images: images.length > 0 ? images : undefined,
         observationId,
         reasoning: !!thinking ? { content: thinking, signature: thinkingSignature } : undefined,
+        speed,
         toolCalls,
         traceId,
         type: finishedType,