@lobehub/chat 1.81.6 → 1.81.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +50 -0
  2. package/changelog/v1.json +18 -0
  3. package/locales/ar/chat.json +10 -0
  4. package/locales/bg-BG/chat.json +10 -0
  5. package/locales/de-DE/chat.json +10 -0
  6. package/locales/en-US/chat.json +10 -0
  7. package/locales/es-ES/chat.json +10 -0
  8. package/locales/fa-IR/chat.json +10 -0
  9. package/locales/fr-FR/chat.json +10 -0
  10. package/locales/it-IT/chat.json +10 -0
  11. package/locales/ja-JP/chat.json +10 -0
  12. package/locales/ko-KR/chat.json +10 -0
  13. package/locales/nl-NL/chat.json +10 -0
  14. package/locales/pl-PL/chat.json +10 -0
  15. package/locales/pt-BR/chat.json +10 -0
  16. package/locales/ru-RU/chat.json +10 -0
  17. package/locales/tr-TR/chat.json +10 -0
  18. package/locales/vi-VN/chat.json +10 -0
  19. package/locales/zh-CN/chat.json +10 -0
  20. package/locales/zh-TW/chat.json +10 -0
  21. package/package.json +1 -1
  22. package/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx +31 -4
  23. package/src/features/Conversation/Extras/Usage/index.tsx +1 -1
  24. package/src/libs/agent-runtime/anthropic/index.ts +7 -3
  25. package/src/libs/agent-runtime/perplexity/index.test.ts +4 -1
  26. package/src/libs/agent-runtime/togetherai/index.ts +19 -23
  27. package/src/libs/agent-runtime/togetherai/type.ts +2 -2
  28. package/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts +17 -8
  29. package/src/libs/agent-runtime/utils/streams/anthropic.test.ts +11 -5
  30. package/src/libs/agent-runtime/utils/streams/anthropic.ts +11 -2
  31. package/src/libs/agent-runtime/utils/streams/openai.ts +5 -2
  32. package/src/libs/agent-runtime/utils/streams/protocol.test.ts +67 -1
  33. package/src/libs/agent-runtime/utils/streams/protocol.ts +46 -1
  34. package/src/locales/default/chat.ts +11 -0
  35. package/src/store/chat/slices/aiChat/actions/generateAIChat.ts +2 -2
  36. package/src/types/message/base.ts +8 -0
  37. package/src/utils/fetch/fetchSSE.ts +17 -1
package/CHANGELOG.md CHANGED
@@ -2,6 +2,56 @@
2
2
 
3
3
  # Changelog
4
4
 
5
+ ### [Version 1.81.8](https://github.com/lobehub/lobe-chat/compare/v1.81.7...v1.81.8)
6
+
7
+ <sup>Released on **2025-04-21**</sup>
8
+
9
+ #### 💄 Styles
10
+
11
+ - **misc**: Show token generate performance.
12
+
13
+ <br/>
14
+
15
+ <details>
16
+ <summary><kbd>Improvements and Fixes</kbd></summary>
17
+
18
+ #### Styles
19
+
20
+ - **misc**: Show token generate performance, closes [#6959](https://github.com/lobehub/lobe-chat/issues/6959) ([33c3fe7](https://github.com/lobehub/lobe-chat/commit/33c3fe7))
21
+
22
+ </details>
23
+
24
+ <div align="right">
25
+
26
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
27
+
28
+ </div>
29
+
30
+ ### [Version 1.81.7](https://github.com/lobehub/lobe-chat/compare/v1.81.6...v1.81.7)
31
+
32
+ <sup>Released on **2025-04-21**</sup>
33
+
34
+ #### 🐛 Bug Fixes
35
+
36
+ - **misc**: Together.ai fetch model list.
37
+
38
+ <br/>
39
+
40
+ <details>
41
+ <summary><kbd>Improvements and Fixes</kbd></summary>
42
+
43
+ #### What's fixed
44
+
45
+ - **misc**: Together.ai fetch model list, closes [#7498](https://github.com/lobehub/lobe-chat/issues/7498) ([bd797b8](https://github.com/lobehub/lobe-chat/commit/bd797b8))
46
+
47
+ </details>
48
+
49
+ <div align="right">
50
+
51
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
52
+
53
+ </div>
54
+
5
55
  ### [Version 1.81.6](https://github.com/lobehub/lobe-chat/compare/v1.81.5...v1.81.6)
6
56
 
7
57
  <sup>Released on **2025-04-21**</sup>
package/changelog/v1.json CHANGED
@@ -1,4 +1,22 @@
1
1
  [
2
+ {
3
+ "children": {
4
+ "improvements": [
5
+ "Show token generate performance."
6
+ ]
7
+ },
8
+ "date": "2025-04-21",
9
+ "version": "1.81.8"
10
+ },
11
+ {
12
+ "children": {
13
+ "fixes": [
14
+ "Together.ai fetch model list."
15
+ ]
16
+ },
17
+ "date": "2025-04-21",
18
+ "version": "1.81.7"
19
+ },
2
20
  {
3
21
  "children": {
4
22
  "improvements": [
@@ -119,6 +119,16 @@
119
119
  "outputText": "مخرجات نصية",
120
120
  "outputTitle": "تفاصيل المخرجات",
121
121
  "reasoning": "تفكير عميق",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "عدد الرموز في الثانية، TPS. يشير إلى متوسط سرعة توليد المحتوى بواسطة الذكاء الاصطناعي (رمز/ثانية)، ويبدأ الحساب عند استلام أول رمز."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "الوقت حتى أول رمز، TTFT. يشير إلى الفارق الزمني من لحظة إرسال الرسالة حتى استلام أول رمز في العميل."
130
+ }
131
+ },
122
132
  "title": "تفاصيل التوليد",
123
133
  "total": "الإجمالي المستهلك"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "Текстов изход",
120
120
  "outputTitle": "Детайли за изхода",
121
121
  "reasoning": "Дълбочинно разсъждение",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Токени на секунда, TPS. Отнася се до средната скорост на генериране на съдържание от AI (Токен/секунда), започвайки да се изчислява след получаване на първия токен."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "Време до първия токен, TTFT. Отнася се до времевия интервал от изпращането на съобщението до получаването на първия токен от клиента."
130
+ }
131
+ },
122
132
  "title": "Детайли за генериране",
123
133
  "total": "Общо разходи"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "Text-Ausgabe",
120
120
  "outputTitle": "Ausgabedetails",
121
121
  "reasoning": "Tiefes Denken",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Tokens pro Sekunde, TPS. Bezieht sich auf die durchschnittliche Geschwindigkeit, mit der AI-Inhalte generiert werden (Token/Sekunde), und beginnt mit der Berechnung, nachdem das erste Token empfangen wurde."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "Zeit bis zum ersten Token, TTFT. Bezieht sich auf das Zeitintervall von dem Moment, in dem Sie eine Nachricht senden, bis der Client das erste Token erhält."
130
+ }
131
+ },
122
132
  "title": "Generierungsdetails",
123
133
  "total": "Gesamter Verbrauch"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "Text Output",
120
120
  "outputTitle": "Output Details",
121
121
  "reasoning": "Deep Thinking",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Tokens Per Second (TPS). This indicates the average speed of AI-generated content (Tokens/second), calculated from the moment the first Token is received."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "Time To First Token (TTFT). This refers to the time interval from when you send a message to when the client receives the first Token."
130
+ }
131
+ },
122
132
  "title": "Generation Details",
123
133
  "total": "Total Consumption"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "Salida de texto",
120
120
  "outputTitle": "Detalles de salida",
121
121
  "reasoning": "Razonamiento profundo",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Tokens Por Segundo, TPS. Se refiere a la velocidad promedio de generación de contenido por la IA (Token/segundo), comenzando a contar desde que se recibe el primer Token."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "Tiempo Hasta el Primer Token, TTFT. Se refiere al intervalo de tiempo desde que envía un mensaje hasta que el cliente recibe el primer Token."
130
+ }
131
+ },
122
132
  "title": "Detalles de generación",
123
133
  "total": "Total consumido"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "خروجی متنی",
120
120
  "outputTitle": "جزئیات خروجی",
121
121
  "reasoning": "تفکر عمیق",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "تعداد توکن در ثانیه، TPS. به میانگین سرعت تولید محتوای AI اشاره دارد (توکن/ثانیه) و از زمان دریافت اولین توکن شروع به محاسبه می‌شود."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "زمان تا اولین توکن، TTFT. به فاصله زمانی بین ارسال پیام شما و دریافت اولین توکن توسط کلاینت اشاره دارد."
130
+ }
131
+ },
122
132
  "title": "جزئیات تولید",
123
133
  "total": "مجموع مصرف"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "Sortie texte",
120
120
  "outputTitle": "Détails de la sortie",
121
121
  "reasoning": "Raisonnement approfondi",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Tokens Par Seconde, TPS. Indique la vitesse moyenne de génération de contenu par l'IA (Token/seconde), calculée à partir de la réception du premier Token."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "Temps Jusqu'au Premier Token, TTFT. Indique l'intervalle de temps entre l'envoi de votre message et la réception du premier Token par le client."
130
+ }
131
+ },
122
132
  "title": "Détails de génération",
123
133
  "total": "Total consommé"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "Output testo",
120
120
  "outputTitle": "Dettagli output",
121
121
  "reasoning": "Ragionamento profondo",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Token Per Second, TPS. Indica la velocità media di generazione dei contenuti da parte dell'AI (Token/secondo), calcolata a partire dalla ricezione del primo Token."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "Time To First Token, TTFT. Indica l'intervallo di tempo che intercorre tra l'invio del messaggio e la ricezione del primo Token da parte del client."
130
+ }
131
+ },
122
132
  "title": "Dettagli generati",
123
133
  "total": "Totale consumato"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "テキスト出力",
120
120
  "outputTitle": "出力の詳細",
121
121
  "reasoning": "深い思考",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "トークン毎秒(TPS)。AIが生成するコンテンツの平均速度(トークン/秒)を示し、最初のトークンを受信した時点から計算が始まります。"
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "最初のトークンまでの時間(TTFT)。メッセージを送信してからクライアントが最初のトークンを受信するまでの時間間隔を示します。"
130
+ }
131
+ },
122
132
  "title": "生成の詳細",
123
133
  "total": "合計消費"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "텍스트 출력",
120
120
  "outputTitle": "출력 세부사항",
121
121
  "reasoning": "심층 사고",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "초당 토큰 수(TPS). AI가 생성한 콘텐츠의 평균 속도(토큰/초)를 나타내며, 첫 번째 토큰을 수신한 후부터 계산됩니다."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "첫 번째 토큰까지의 시간(Time To First Token, TTFT). 메시지를 전송한 시점부터 클라이언트가 첫 번째 토큰을 수신할 때까지의 시간 간격을 나타냅니다."
130
+ }
131
+ },
122
132
  "title": "생성 세부사항",
123
133
  "total": "총 소모"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "Tekstuitvoer",
120
120
  "outputTitle": "Uitvoerdetails",
121
121
  "reasoning": "Diep nadenken",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Tokens Per Second, TPS. Dit verwijst naar de gemiddelde snelheid van AI-gegenereerde inhoud (Token/seconde), die begint te tellen vanaf het moment dat het eerste Token is ontvangen."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "Time To First Token, TTFT. Dit verwijst naar de tijdsduur van het moment dat u een bericht verzendt tot het moment dat de client het eerste Token ontvangt."
130
+ }
131
+ },
122
132
  "title": "Genereren van details",
123
133
  "total": "Totaal verbruik"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "Wyjście tekstowe",
120
120
  "outputTitle": "Szczegóły wyjścia",
121
121
  "reasoning": "Głębokie myślenie",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Tokeny na sekundę, TPS. Oznacza średnią prędkość generowania treści przez AI (Tokeny/sekundę), obliczaną od momentu otrzymania pierwszego Tokena."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "Czas do pierwszego tokena, TTFT. Oznacza czas od momentu wysłania wiadomości do momentu, w którym klient otrzymuje pierwszy Token."
130
+ }
131
+ },
122
132
  "title": "Szczegóły generacji",
123
133
  "total": "Całkowite zużycie"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "Saída de texto",
120
120
  "outputTitle": "Detalhes da saída",
121
121
  "reasoning": "Raciocínio profundo",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Tokens Por Segundo, TPS. Refere-se à velocidade média de geração de conteúdo pela IA (Token/segundo), começando a contagem após o recebimento do primeiro Token."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "Tempo Para o Primeiro Token, TTFT. Refere-se ao intervalo de tempo desde que você envia a mensagem até que o cliente receba o primeiro Token."
130
+ }
131
+ },
122
132
  "title": "Detalhes da geração",
123
133
  "total": "Total consumido"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "Текстовый выход",
120
120
  "outputTitle": "Детали выхода",
121
121
  "reasoning": "Глубокое мышление",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Токены в секунду (TPS). Средняя скорость генерации контента ИИ (Токенов/сек), начинается с момента получения первого токена."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "Время до первого токена (TTFT). Время, прошедшее с момента отправки вами сообщения до получения первого токена клиентом."
130
+ }
131
+ },
122
132
  "title": "Детали генерации",
123
133
  "total": "Общее потребление"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "Metin çıkışı",
120
120
  "outputTitle": "Çıkış detayları",
121
121
  "reasoning": "Derin düşünme",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Token Başına Saniye, TPS. AI tarafından üretilen içeriğin ortalama hızını (Token/saniye) belirtir, ilk Token alındıktan sonra hesaplanmaya başlanır."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "İlk Token'a Kadar Geçen Süre, TTFT. Mesajınızı göndermenizden, istemcinin ilk Token'ı almasına kadar geçen zaman aralığını belirtir."
130
+ }
131
+ },
122
132
  "title": "Üretim detayları",
123
133
  "total": "Toplam tüketim"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "Văn bản xuất",
120
120
  "outputTitle": "Chi tiết xuất",
121
121
  "reasoning": "Suy nghĩ sâu sắc",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Tokens Mỗi Giây, TPS. Chỉ tốc độ trung bình của nội dung do AI tạo ra (Token/giây), bắt đầu tính từ khi nhận được Token đầu tiên."
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "Thời Gian Đến Token Đầu Tiên, TTFT. Chỉ khoảng thời gian từ khi bạn gửi tin nhắn đến khi khách hàng nhận được Token đầu tiên."
130
+ }
131
+ },
122
132
  "title": "Chi tiết tạo ra",
123
133
  "total": "Tổng tiêu thụ"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "文本输出",
120
120
  "outputTitle": "输出明细",
121
121
  "reasoning": "深度思考",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "Tokens Per Second,TPS。指AI生成内容的平均速度(Token/秒),在接收到首个 Token 后开始计算。"
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "Time To First Token,TTFT。指从您发送消息到客户端接收到首个 Token 的时间间隔。"
130
+ }
131
+ },
122
132
  "title": "生成明细",
123
133
  "total": "总计消耗"
124
134
  }
@@ -119,6 +119,16 @@
119
119
  "outputText": "文本輸出",
120
120
  "outputTitle": "輸出明細",
121
121
  "reasoning": "深度思考",
122
+ "speed": {
123
+ "tps": {
124
+ "title": "TPS",
125
+ "tooltip": "每秒令牌數,TPS。指AI生成內容的平均速度(Token/秒),在接收到首個令牌後開始計算。"
126
+ },
127
+ "ttft": {
128
+ "title": "TTFT",
129
+ "tooltip": "首次令牌時間,TTFT。指從您發送消息到客戶端接收到首個令牌的時間間隔。"
130
+ }
131
+ },
122
132
  "title": "生成明細",
123
133
  "total": "總計消耗"
124
134
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/chat",
3
- "version": "1.81.6",
3
+ "version": "1.81.8",
4
4
  "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -6,10 +6,11 @@ import { memo } from 'react';
6
6
  import { useTranslation } from 'react-i18next';
7
7
  import { Center, Flexbox } from 'react-layout-kit';
8
8
 
9
+ import InfoTooltip from '@/components/InfoTooltip';
9
10
  import { aiModelSelectors, useAiInfraStore } from '@/store/aiInfra';
10
11
  import { useGlobalStore } from '@/store/global';
11
12
  import { systemStatusSelectors } from '@/store/global/selectors';
12
- import { ModelTokensUsage } from '@/types/message';
13
+ import { MessageMetadata } from '@/types/message';
13
14
  import { formatNumber } from '@/utils/format';
14
15
 
15
16
  import ModelCard from './ModelCard';
@@ -17,19 +18,19 @@ import TokenProgress, { TokenProgressItem } from './TokenProgress';
17
18
  import { getDetailsToken } from './tokens';
18
19
 
19
20
  interface TokenDetailProps {
21
+ meta: MessageMetadata;
20
22
  model: string;
21
23
  provider: string;
22
- usage: ModelTokensUsage;
23
24
  }
24
25
 
25
- const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
26
+ const TokenDetail = memo<TokenDetailProps>(({ meta, model, provider }) => {
26
27
  const { t } = useTranslation('chat');
27
28
  const theme = useTheme();
28
29
 
29
30
  const modelCard = useAiInfraStore(aiModelSelectors.getModelCard(model, provider));
30
31
  const isShowCredit = useGlobalStore(systemStatusSelectors.isShowCredit) && !!modelCard?.pricing;
31
32
 
32
- const detailTokens = getDetailsToken(usage, modelCard);
33
+ const detailTokens = getDetailsToken(meta, modelCard);
33
34
  const inputDetails = [
34
35
  !!detailTokens.inputAudio && {
35
36
  color: theme.cyan9,
@@ -113,6 +114,10 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
113
114
  detailTokens.totalTokens!.credit / detailTokens.totalTokens!.token,
114
115
  2,
115
116
  );
117
+
118
+ const tps = meta?.tps ? formatNumber(meta.tps, 2) : undefined;
119
+ const ttft = meta?.ttft ? formatNumber(meta.ttft / 1000, 2) : undefined;
120
+
116
121
  return (
117
122
  <Popover
118
123
  arrow={false}
@@ -170,6 +175,28 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
170
175
  <div style={{ fontWeight: 500 }}>{averagePricing}</div>
171
176
  </Flexbox>
172
177
  )}
178
+ {tps && (
179
+ <Flexbox align={'center'} gap={4} horizontal justify={'space-between'}>
180
+ <Flexbox gap={8} horizontal>
181
+ <div style={{ color: theme.colorTextSecondary }}>
182
+ {t('messages.tokenDetails.speed.tps.title')}
183
+ </div>
184
+ <InfoTooltip title={t('messages.tokenDetails.speed.tps.tooltip')} />
185
+ </Flexbox>
186
+ <div style={{ fontWeight: 500 }}>{tps}</div>
187
+ </Flexbox>
188
+ )}
189
+ {ttft && (
190
+ <Flexbox align={'center'} gap={4} horizontal justify={'space-between'}>
191
+ <Flexbox gap={8} horizontal>
192
+ <div style={{ color: theme.colorTextSecondary }}>
193
+ {t('messages.tokenDetails.speed.ttft.title')}
194
+ </div>
195
+ <InfoTooltip title={t('messages.tokenDetails.speed.ttft.tooltip')} />
196
+ </Flexbox>
197
+ <div style={{ fontWeight: 500 }}>{ttft}s</div>
198
+ </Flexbox>
199
+ )}
173
200
  </Flexbox>
174
201
  </Flexbox>
175
202
  </Flexbox>
@@ -31,7 +31,7 @@ const Usage = memo<UsageProps>(({ model, metadata, provider }) => {
31
31
  </Center>
32
32
 
33
33
  {!!metadata.totalTokens && (
34
- <TokenDetail model={model as string} provider={provider} usage={metadata} />
34
+ <TokenDetail meta={metadata} model={model as string} provider={provider} />
35
35
  )}
36
36
  </Flexbox>
37
37
  );
@@ -56,6 +56,7 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
56
56
  async chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions) {
57
57
  try {
58
58
  const anthropicPayload = await this.buildAnthropicPayload(payload);
59
+ const inputStartAt = Date.now();
59
60
 
60
61
  if (this.isDebug()) {
61
62
  console.log('[requestPayload]');
@@ -79,9 +80,12 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
79
80
  debugStream(debug.toReadableStream()).catch(console.error);
80
81
  }
81
82
 
82
- return StreamingResponse(AnthropicStream(prod, options?.callback), {
83
- headers: options?.headers,
84
- });
83
+ return StreamingResponse(
84
+ AnthropicStream(prod, { callbacks: options?.callback, inputStartAt }),
85
+ {
86
+ headers: options?.headers,
87
+ },
88
+ );
85
89
  } catch (error) {
86
90
  throw this.handleError(error);
87
91
  }
@@ -225,7 +225,10 @@ describe('LobePerplexityAI', () => {
225
225
  stream.push(decoder.decode(value));
226
226
  }
227
227
 
228
- expect(stream).toEqual(
228
+ // Slice out speed chunk
229
+ const noSpeedStream = stream.slice(0, -3);
230
+
231
+ expect(noSpeedStream).toEqual(
229
232
  [
230
233
  'id: 506d64fb-e7f2-4d94-b80f-158369e9446d',
231
234
  'event: text',
@@ -1,9 +1,9 @@
1
+ import type { ChatModelCard } from '@/types/llm';
2
+
1
3
  import { ModelProvider } from '../types';
2
4
  import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';
3
5
  import { TogetherAIModel } from './type';
4
6
 
5
- import type { ChatModelCard } from '@/types/llm';
6
-
7
7
  export const LobeTogetherAI = LobeOpenAICompatibleFactory({
8
8
  baseURL: 'https://api.together.xyz/v1',
9
9
  constructorOptions: {
@@ -18,24 +18,20 @@ export const LobeTogetherAI = LobeOpenAICompatibleFactory({
18
18
  models: async ({ client }) => {
19
19
  const { LOBE_DEFAULT_MODEL_LIST } = await import('@/config/aiModels');
20
20
 
21
- const visionKeywords = [
22
- 'qvq',
23
- 'vision',
24
- ];
21
+ const visionKeywords = ['qvq', 'vision'];
25
22
 
26
- const reasoningKeywords = [
27
- 'deepseek-r1',
28
- 'qwq',
29
- ];
23
+ const reasoningKeywords = ['deepseek-r1', 'qwq'];
30
24
 
31
25
  client.baseURL = 'https://api.together.xyz/api';
32
26
 
33
- const modelsPage = await client.models.list() as any;
27
+ const modelsPage = (await client.models.list()) as any;
34
28
  const modelList: TogetherAIModel[] = modelsPage.body;
35
29
 
36
30
  return modelList
37
31
  .map((model) => {
38
- const knownModel = LOBE_DEFAULT_MODEL_LIST.find((m) => model.name.toLowerCase() === m.id.toLowerCase());
32
+ const knownModel = LOBE_DEFAULT_MODEL_LIST.find(
33
+ (m) => model.id.toLowerCase() === m.id.toLowerCase(),
34
+ );
39
35
 
40
36
  return {
41
37
  contextWindowTokens: knownModel?.contextWindowTokens ?? undefined,
@@ -43,21 +39,21 @@ export const LobeTogetherAI = LobeOpenAICompatibleFactory({
43
39
  displayName: model.display_name,
44
40
  enabled: knownModel?.enabled || false,
45
41
  functionCall:
46
- model.description?.toLowerCase().includes('function calling')
47
- || knownModel?.abilities?.functionCall
48
- || false,
49
- id: model.name,
42
+ model.description?.toLowerCase().includes('function calling') ||
43
+ knownModel?.abilities?.functionCall ||
44
+ false,
45
+ id: model.id,
50
46
  maxOutput: model.context_length,
51
47
  reasoning:
52
- reasoningKeywords.some(keyword => model.name.toLowerCase().includes(keyword))
53
- || knownModel?.abilities?.functionCall
54
- || false,
48
+ reasoningKeywords.some((keyword) => model.id.toLowerCase().includes(keyword)) ||
49
+ knownModel?.abilities?.functionCall ||
50
+ false,
55
51
  tokens: model.context_length,
56
52
  vision:
57
- model.description?.toLowerCase().includes('vision')
58
- || visionKeywords.some(keyword => model.name?.toLowerCase().includes(keyword))
59
- || knownModel?.abilities?.functionCall
60
- || false,
53
+ model.description?.toLowerCase().includes('vision') ||
54
+ visionKeywords.some((keyword) => model.id?.toLowerCase().includes(keyword)) ||
55
+ knownModel?.abilities?.functionCall ||
56
+ false,
61
57
  };
62
58
  })
63
59
  .filter(Boolean) as ChatModelCard[];
@@ -50,7 +50,8 @@ interface Depth {
50
50
  }
51
51
 
52
52
  export interface TogetherAIModel {
53
- _id: string;
53
+ id: string;
54
+ // eslint-disable-next-line typescript-sort-keys/interface
54
55
  access: string;
55
56
  config: Config;
56
57
  context_length: number;
@@ -67,7 +68,6 @@ export interface TogetherAIModel {
67
68
  license: string;
68
69
  link: string;
69
70
  modelInstanceConfig: ModelInstanceConfig;
70
- name: string;
71
71
  num_parameters: number;
72
72
  pricing: Pricing;
73
73
  show_in_playground: boolean;
@@ -201,6 +201,7 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
201
201
 
202
202
  async chat({ responseMode, ...payload }: ChatStreamPayload, options?: ChatCompetitionOptions) {
203
203
  try {
204
+ const inputStartAt = Date.now();
204
205
  const postPayload = chatCompletion?.handlePayload
205
206
  ? chatCompletion.handlePayload(payload, this._options)
206
207
  : ({
@@ -253,10 +254,14 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
253
254
  debugStream(useForDebugStream).catch(console.error);
254
255
  }
255
256
 
256
- const streamHandler = chatCompletion?.handleStream || OpenAIStream;
257
- return StreamingResponse(streamHandler(prod, streamOptions), {
258
- headers: options?.headers,
259
- });
257
+ return StreamingResponse(
258
+ chatCompletion?.handleStream
259
+ ? chatCompletion.handleStream(prod, streamOptions.callbacks)
260
+ : OpenAIStream(prod, { ...streamOptions, inputStartAt }),
261
+ {
262
+ headers: options?.headers,
263
+ },
264
+ );
260
265
  }
261
266
 
262
267
  if (debug?.chatCompletion?.()) {
@@ -269,10 +274,14 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
269
274
  chatCompletion?.handleTransformResponseToStream || transformResponseToStream;
270
275
  const stream = transformHandler(response as unknown as OpenAI.ChatCompletion);
271
276
 
272
- const streamHandler = chatCompletion?.handleStream || OpenAIStream;
273
- return StreamingResponse(streamHandler(stream, streamOptions), {
274
- headers: options?.headers,
275
- });
277
+ return StreamingResponse(
278
+ chatCompletion?.handleStream
279
+ ? chatCompletion.handleStream(stream, streamOptions.callbacks)
280
+ : OpenAIStream(stream, { ...streamOptions, inputStartAt }),
281
+ {
282
+ headers: options?.headers,
283
+ },
284
+ );
276
285
  } catch (error) {
277
286
  throw this.handleError(error);
278
287
  }
@@ -61,9 +61,11 @@ describe('AnthropicStream', () => {
61
61
  const onCompletionMock = vi.fn();
62
62
 
63
63
  const protocolStream = AnthropicStream(mockAnthropicStream, {
64
- onStart: onStartMock,
65
- onText: onTextMock,
66
- onCompletion: onCompletionMock,
64
+ callbacks: {
65
+ onStart: onStartMock,
66
+ onText: onTextMock,
67
+ onCompletion: onCompletionMock,
68
+ },
67
69
  });
68
70
 
69
71
  const decoder = new TextDecoder();
@@ -165,7 +167,9 @@ describe('AnthropicStream', () => {
165
167
  const onToolCallMock = vi.fn();
166
168
 
167
169
  const protocolStream = AnthropicStream(mockReadableStream, {
168
- onToolsCalling: onToolCallMock,
170
+ callbacks: {
171
+ onToolsCalling: onToolCallMock,
172
+ },
169
173
  });
170
174
 
171
175
  const decoder = new TextDecoder();
@@ -317,7 +321,9 @@ describe('AnthropicStream', () => {
317
321
  const onToolCallMock = vi.fn();
318
322
 
319
323
  const protocolStream = AnthropicStream(mockReadableStream, {
320
- onToolsCalling: onToolCallMock,
324
+ callbacks: {
325
+ onToolsCalling: onToolCallMock,
326
+ },
321
327
  });
322
328
 
323
329
  const decoder = new TextDecoder();
@@ -12,6 +12,7 @@ import {
12
12
  convertIterableToStream,
13
13
  createCallbacksTransformer,
14
14
  createSSEProtocolTransformer,
15
+ createTokenSpeedCalculator,
15
16
  } from './protocol';
16
17
 
17
18
  export const transformAnthropicStream = (
@@ -188,9 +189,14 @@ export const transformAnthropicStream = (
188
189
  }
189
190
  };
190
191
 
192
+ export interface AnthropicStreamOptions {
193
+ callbacks?: ChatStreamCallbacks;
194
+ inputStartAt?: number;
195
+ }
196
+
191
197
  export const AnthropicStream = (
192
198
  stream: Stream<Anthropic.MessageStreamEvent> | ReadableStream,
193
- callbacks?: ChatStreamCallbacks,
199
+ { callbacks, inputStartAt }: AnthropicStreamOptions = {},
194
200
  ) => {
195
201
  const streamStack: StreamContext = { id: '' };
196
202
 
@@ -198,6 +204,9 @@ export const AnthropicStream = (
198
204
  stream instanceof ReadableStream ? stream : convertIterableToStream(stream);
199
205
 
200
206
  return readableStream
201
- .pipeThrough(createSSEProtocolTransformer(transformAnthropicStream, streamStack))
207
+ .pipeThrough(
208
+ createTokenSpeedCalculator(transformAnthropicStream, { inputStartAt, streamStack }),
209
+ )
210
+ .pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
202
211
  .pipeThrough(createCallbacksTransformer(callbacks));
203
212
  };
@@ -16,6 +16,7 @@ import {
16
16
  createCallbacksTransformer,
17
17
  createFirstErrorHandleTransformer,
18
18
  createSSEProtocolTransformer,
19
+ createTokenSpeedCalculator,
19
20
  generateToolCallId,
20
21
  } from './protocol';
21
22
 
@@ -218,12 +219,13 @@ export interface OpenAIStreamOptions {
218
219
  name: string;
219
220
  }) => ILobeAgentRuntimeErrorType | undefined;
220
221
  callbacks?: ChatStreamCallbacks;
222
+ inputStartAt?: number;
221
223
  provider?: string;
222
224
  }
223
225
 
224
226
  export const OpenAIStream = (
225
227
  stream: Stream<OpenAI.ChatCompletionChunk> | ReadableStream,
226
- { callbacks, provider, bizErrorTypeTransformer }: OpenAIStreamOptions = {},
228
+ { callbacks, provider, bizErrorTypeTransformer, inputStartAt }: OpenAIStreamOptions = {},
227
229
  ) => {
228
230
  const streamStack: StreamContext = { id: '' };
229
231
 
@@ -236,7 +238,8 @@ export const OpenAIStream = (
236
238
  // provider like huggingface or minimax will return error in the stream,
237
239
  // so in the first Transformer, we need to handle the error
238
240
  .pipeThrough(createFirstErrorHandleTransformer(bizErrorTypeTransformer, provider))
239
- .pipeThrough(createSSEProtocolTransformer(transformOpenAIStream, streamStack))
241
+ .pipeThrough(createTokenSpeedCalculator(transformOpenAIStream, { inputStartAt, streamStack }))
242
+ .pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
240
243
  .pipeThrough(createCallbacksTransformer(callbacks))
241
244
  );
242
245
  };
@@ -1,6 +1,6 @@
1
1
  import { describe, expect, it } from 'vitest';
2
2
 
3
- import { createSSEDataExtractor } from './protocol';
3
+ import { createSSEDataExtractor, createTokenSpeedCalculator } from './protocol';
4
4
 
5
5
  describe('createSSEDataExtractor', () => {
6
6
  // Helper function to convert string to Uint8Array
@@ -135,3 +135,69 @@ describe('createSSEDataExtractor', () => {
135
135
  });
136
136
  });
137
137
  });
138
+
139
+ describe('createTokenSpeedCalculator', async () => {
140
+ // Mock the param from caller - 1000 to avoid div 0
141
+ const inputStartAt = Date.now() - 1000;
142
+
143
+ // Helper function to process chunks through transformer
144
+ const processChunk = async (transformer: TransformStream, chunk: any) => {
145
+ const results: any[] = [];
146
+ const readable = new ReadableStream({
147
+ start(controller) {
148
+ controller.enqueue(chunk);
149
+ controller.close();
150
+ },
151
+ });
152
+
153
+ const writable = new WritableStream({
154
+ write(chunk) {
155
+ results.push(chunk);
156
+ },
157
+ });
158
+
159
+ await readable.pipeThrough(transformer).pipeTo(writable);
160
+
161
+ return results;
162
+ };
163
+
164
+ it('should calculate token speed correctly', async () => {
165
+ const chunks = [
166
+ { data: '', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'text' },
167
+ { data: 'hi', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'text' },
168
+ { data: 'stop', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'stop' },
169
+ {
170
+ data: {
171
+ inputTextTokens: 9,
172
+ outputTextTokens: 1,
173
+ totalInputTokens: 9,
174
+ totalOutputTokens: 1,
175
+ totalTokens: 10,
176
+ },
177
+ id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy',
178
+ type: 'usage',
179
+ },
180
+ ];
181
+
182
+ const transformer = createTokenSpeedCalculator((v) => v, { inputStartAt });
183
+ const results = await processChunk(transformer, chunks);
184
+ expect(results).toHaveLength(chunks.length + 1);
185
+ const speedChunk = results.slice(-1)[0];
186
+ expect(speedChunk.id).toBe('output_speed');
187
+ expect(speedChunk.type).toBe('speed');
188
+ expect(speedChunk.data.tps).not.toBeNaN();
189
+ expect(speedChunk.data.ttft).not.toBeNaN();
190
+ });
191
+
192
+ it('should not calculate token speed if no usage', async () => {
193
+ const chunks = [
194
+ { data: '', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'text' },
195
+ { data: 'hi', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'text' },
196
+ { data: 'stop', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'stop' },
197
+ ];
198
+
199
+ const transformer = createTokenSpeedCalculator((v) => v, { inputStartAt });
200
+ const results = await processChunk(transformer, chunks);
201
+ expect(results).toHaveLength(chunks.length);
202
+ });
203
+ });
@@ -1,4 +1,4 @@
1
- import { ModelTokensUsage } from '@/types/message';
1
+ import { ModelSpeed, ModelTokensUsage } from '@/types/message';
2
2
  import { safeParseJSON } from '@/utils/safeParseJSON';
3
3
 
4
4
  import { AgentRuntimeErrorType } from '../../error';
@@ -52,6 +52,8 @@ export interface StreamProtocolChunk {
52
52
  | 'error'
53
53
  // token usage
54
54
  | 'usage'
55
+ // performance monitor
56
+ | 'speed'
55
57
  // unknown data result
56
58
  | 'data';
57
59
  }
@@ -287,3 +289,46 @@ export const createSSEDataExtractor = () =>
287
289
  }
288
290
  },
289
291
  });
292
+
293
+ export const TOKEN_SPEED_CHUNK_ID = 'output_speed';
294
+
295
+ /**
296
+ * Create a middleware to calculate the token generate speed
297
+ * @requires createSSEProtocolTransformer
298
+ */
299
+ export const createTokenSpeedCalculator = (
300
+ transformer: (chunk: any, stack: StreamContext) => StreamProtocolChunk | StreamProtocolChunk[],
301
+ { streamStack, inputStartAt }: { inputStartAt?: number; streamStack?: StreamContext } = {},
302
+ ) => {
303
+ let outputStartAt: number | undefined;
304
+
305
+ const process = (chunk: StreamProtocolChunk) => {
306
+ let result = [chunk];
307
+ // if the chunk is the first text chunk, set as output start
308
+ if (!outputStartAt && chunk.type === 'text') outputStartAt = Date.now();
309
+ // if the chunk is the stop chunk, set as output finish
310
+ if (inputStartAt && outputStartAt && chunk.type === 'usage') {
311
+ const outputTokens = chunk.data?.totalOutputTokens || chunk.data?.outputTextTokens;
312
+ result.push({
313
+ data: {
314
+ tps: (outputTokens / (Date.now() - outputStartAt)) * 1000,
315
+ ttft: outputStartAt - inputStartAt,
316
+ } as ModelSpeed,
317
+ id: TOKEN_SPEED_CHUNK_ID,
318
+ type: 'speed',
319
+ });
320
+ }
321
+ return result;
322
+ };
323
+
324
+ return new TransformStream({
325
+ transform(chunk, controller) {
326
+ let result = transformer(chunk, streamStack || { id: '' });
327
+ if (!Array.isArray(result)) result = [result];
328
+ result.forEach((r) => {
329
+ const processed = process(r);
330
+ if (processed) processed.forEach((p) => controller.enqueue(p));
331
+ });
332
+ },
333
+ });
334
+ };
@@ -122,6 +122,17 @@ export default {
122
122
  outputText: '文本输出',
123
123
  outputTitle: '输出明细',
124
124
  reasoning: '深度思考',
125
+ speed: {
126
+ tps: {
127
+ title: 'TPS',
128
+ tooltip:
129
+ 'Tokens Per Second,TPS。指AI生成内容的平均速度(Token/秒),在接收到首个 Token 后开始计算。',
130
+ },
131
+ ttft: {
132
+ title: 'TTFT',
133
+ tooltip: 'Time To First Token,TTFT。指从您发送消息到客户端接收到首个 Token 的时间间隔。',
134
+ },
135
+ },
125
136
  title: '生成明细',
126
137
  total: '总计消耗',
127
138
  },
@@ -576,7 +576,7 @@ export const generateAIChat: StateCreator<
576
576
  },
577
577
  onFinish: async (
578
578
  content,
579
- { traceId, observationId, toolCalls, reasoning, grounding, usage },
579
+ { traceId, observationId, toolCalls, reasoning, grounding, usage, speed },
580
580
  ) => {
581
581
  // if there is traceId, update it
582
582
  if (traceId) {
@@ -611,8 +611,8 @@ export const generateAIChat: StateCreator<
611
611
  toolCalls,
612
612
  reasoning: !!reasoning ? { ...reasoning, duration } : undefined,
613
613
  search: !!grounding?.citations ? grounding : undefined,
614
- metadata: usage,
615
614
  imageList: finalImages.length > 0 ? finalImages : undefined,
615
+ metadata: speed ? { ...usage, ...speed } : usage,
616
616
  });
617
617
  },
618
618
  onMessageHandle: async (chunk) => {
@@ -44,8 +44,16 @@ export interface ModelTokensUsage {
44
44
  totalTokens?: number;
45
45
  }
46
46
 
47
+ export interface ModelSpeed {
48
+ // tokens per second
49
+ tps?: number;
50
+ // time to fist token
51
+ ttft?: number;
52
+ }
53
+
47
54
  export interface MessageMetadata extends ModelTokensUsage {
48
55
  tps?: number;
56
+ ttft?: number;
49
57
  }
50
58
 
51
59
  export type MessageRoleType = 'user' | 'system' | 'assistant' | 'tool';
@@ -11,6 +11,7 @@ import {
11
11
  MessageToolCallChunk,
12
12
  MessageToolCallSchema,
13
13
  ModelReasoning,
14
+ ModelSpeed,
14
15
  ModelTokensUsage,
15
16
  } from '@/types/message';
16
17
  import { ChatImageChunk } from '@/types/message/image';
@@ -29,6 +30,7 @@ export type OnFinishHandler = (
29
30
  images?: ChatImageChunk[];
30
31
  observationId?: string | null;
31
32
  reasoning?: ModelReasoning;
33
+ speed?: ModelSpeed;
32
34
  toolCalls?: MessageToolCall[];
33
35
  traceId?: string | null;
34
36
  type?: SSEFinishType;
@@ -41,6 +43,11 @@ export interface MessageUsageChunk {
41
43
  usage: ModelTokensUsage;
42
44
  }
43
45
 
46
+ export interface MessageSpeedChunk {
47
+ speed: ModelSpeed;
48
+ type: 'speed';
49
+ }
50
+
44
51
  export interface MessageTextChunk {
45
52
  text: string;
46
53
  type: 'text';
@@ -82,7 +89,8 @@ export interface FetchSSEOptions {
82
89
  | MessageReasoningChunk
83
90
  | MessageGroundingChunk
84
91
  | MessageUsageChunk
85
- | MessageBase64ImageChunk,
92
+ | MessageBase64ImageChunk
93
+ | MessageSpeedChunk,
86
94
  ) => void;
87
95
  smoothing?: SmoothingParams | boolean;
88
96
  }
@@ -342,6 +350,7 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
342
350
  let grounding: GroundingSearch | undefined = undefined;
343
351
  let usage: ModelTokensUsage | undefined = undefined;
344
352
  let images: ChatImageChunk[] = [];
353
+ let speed: ModelSpeed | undefined = undefined;
345
354
 
346
355
  await fetchEventSource(url, {
347
356
  body: options.body,
@@ -433,6 +442,12 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
433
442
  break;
434
443
  }
435
444
 
445
+ case 'speed': {
446
+ speed = data;
447
+ options.onMessageHandle?.({ speed: data, type: 'speed' });
448
+ break;
449
+ }
450
+
436
451
  case 'grounding': {
437
452
  grounding = data;
438
453
  options.onMessageHandle?.({ grounding: data, type: 'grounding' });
@@ -517,6 +532,7 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
517
532
  images: images.length > 0 ? images : undefined,
518
533
  observationId,
519
534
  reasoning: !!thinking ? { content: thinking, signature: thinkingSignature } : undefined,
535
+ speed,
520
536
  toolCalls,
521
537
  traceId,
522
538
  type: finishedType,