@lobehub/chat 1.81.6 → 1.81.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -0
- package/changelog/v1.json +18 -0
- package/locales/ar/chat.json +10 -0
- package/locales/bg-BG/chat.json +10 -0
- package/locales/de-DE/chat.json +10 -0
- package/locales/en-US/chat.json +10 -0
- package/locales/es-ES/chat.json +10 -0
- package/locales/fa-IR/chat.json +10 -0
- package/locales/fr-FR/chat.json +10 -0
- package/locales/it-IT/chat.json +10 -0
- package/locales/ja-JP/chat.json +10 -0
- package/locales/ko-KR/chat.json +10 -0
- package/locales/nl-NL/chat.json +10 -0
- package/locales/pl-PL/chat.json +10 -0
- package/locales/pt-BR/chat.json +10 -0
- package/locales/ru-RU/chat.json +10 -0
- package/locales/tr-TR/chat.json +10 -0
- package/locales/vi-VN/chat.json +10 -0
- package/locales/zh-CN/chat.json +10 -0
- package/locales/zh-TW/chat.json +10 -0
- package/package.json +1 -1
- package/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx +31 -4
- package/src/features/Conversation/Extras/Usage/index.tsx +1 -1
- package/src/libs/agent-runtime/anthropic/index.ts +7 -3
- package/src/libs/agent-runtime/perplexity/index.test.ts +4 -1
- package/src/libs/agent-runtime/togetherai/index.ts +19 -23
- package/src/libs/agent-runtime/togetherai/type.ts +2 -2
- package/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts +17 -8
- package/src/libs/agent-runtime/utils/streams/anthropic.test.ts +11 -5
- package/src/libs/agent-runtime/utils/streams/anthropic.ts +11 -2
- package/src/libs/agent-runtime/utils/streams/openai.ts +5 -2
- package/src/libs/agent-runtime/utils/streams/protocol.test.ts +67 -1
- package/src/libs/agent-runtime/utils/streams/protocol.ts +46 -1
- package/src/locales/default/chat.ts +11 -0
- package/src/store/chat/slices/aiChat/actions/generateAIChat.ts +2 -2
- package/src/types/message/base.ts +8 -0
- package/src/utils/fetch/fetchSSE.ts +17 -1
package/CHANGELOG.md
CHANGED
@@ -2,6 +2,56 @@
|
|
2
2
|
|
3
3
|
# Changelog
|
4
4
|
|
5
|
+
### [Version 1.81.8](https://github.com/lobehub/lobe-chat/compare/v1.81.7...v1.81.8)
|
6
|
+
|
7
|
+
<sup>Released on **2025-04-21**</sup>
|
8
|
+
|
9
|
+
#### 💄 Styles
|
10
|
+
|
11
|
+
- **misc**: Show token generate performance.
|
12
|
+
|
13
|
+
<br/>
|
14
|
+
|
15
|
+
<details>
|
16
|
+
<summary><kbd>Improvements and Fixes</kbd></summary>
|
17
|
+
|
18
|
+
#### Styles
|
19
|
+
|
20
|
+
- **misc**: Show token generate performance, closes [#6959](https://github.com/lobehub/lobe-chat/issues/6959) ([33c3fe7](https://github.com/lobehub/lobe-chat/commit/33c3fe7))
|
21
|
+
|
22
|
+
</details>
|
23
|
+
|
24
|
+
<div align="right">
|
25
|
+
|
26
|
+
[](#readme-top)
|
27
|
+
|
28
|
+
</div>
|
29
|
+
|
30
|
+
### [Version 1.81.7](https://github.com/lobehub/lobe-chat/compare/v1.81.6...v1.81.7)
|
31
|
+
|
32
|
+
<sup>Released on **2025-04-21**</sup>
|
33
|
+
|
34
|
+
#### 🐛 Bug Fixes
|
35
|
+
|
36
|
+
- **misc**: Together.ai fetch model list.
|
37
|
+
|
38
|
+
<br/>
|
39
|
+
|
40
|
+
<details>
|
41
|
+
<summary><kbd>Improvements and Fixes</kbd></summary>
|
42
|
+
|
43
|
+
#### What's fixed
|
44
|
+
|
45
|
+
- **misc**: Together.ai fetch model list, closes [#7498](https://github.com/lobehub/lobe-chat/issues/7498) ([bd797b8](https://github.com/lobehub/lobe-chat/commit/bd797b8))
|
46
|
+
|
47
|
+
</details>
|
48
|
+
|
49
|
+
<div align="right">
|
50
|
+
|
51
|
+
[](#readme-top)
|
52
|
+
|
53
|
+
</div>
|
54
|
+
|
5
55
|
### [Version 1.81.6](https://github.com/lobehub/lobe-chat/compare/v1.81.5...v1.81.6)
|
6
56
|
|
7
57
|
<sup>Released on **2025-04-21**</sup>
|
package/changelog/v1.json
CHANGED
@@ -1,4 +1,22 @@
|
|
1
1
|
[
|
2
|
+
{
|
3
|
+
"children": {
|
4
|
+
"improvements": [
|
5
|
+
"Show token generate performance."
|
6
|
+
]
|
7
|
+
},
|
8
|
+
"date": "2025-04-21",
|
9
|
+
"version": "1.81.8"
|
10
|
+
},
|
11
|
+
{
|
12
|
+
"children": {
|
13
|
+
"fixes": [
|
14
|
+
"Together.ai fetch model list."
|
15
|
+
]
|
16
|
+
},
|
17
|
+
"date": "2025-04-21",
|
18
|
+
"version": "1.81.7"
|
19
|
+
},
|
2
20
|
{
|
3
21
|
"children": {
|
4
22
|
"improvements": [
|
package/locales/ar/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "مخرجات نصية",
|
120
120
|
"outputTitle": "تفاصيل المخرجات",
|
121
121
|
"reasoning": "تفكير عميق",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "عدد الرموز في الثانية، TPS. يشير إلى متوسط سرعة توليد المحتوى بواسطة الذكاء الاصطناعي (رمز/ثانية)، ويبدأ الحساب عند استلام أول رمز."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "الوقت حتى أول رمز، TTFT. يشير إلى الفارق الزمني من لحظة إرسال الرسالة حتى استلام أول رمز في العميل."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "تفاصيل التوليد",
|
123
133
|
"total": "الإجمالي المستهلك"
|
124
134
|
}
|
package/locales/bg-BG/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "Текстов изход",
|
120
120
|
"outputTitle": "Детайли за изхода",
|
121
121
|
"reasoning": "Дълбочинно разсъждение",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Токени на секунда, TPS. Отнася се до средната скорост на генериране на съдържание от AI (Токен/секунда), започвайки да се изчислява след получаване на първия токен."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "Време до първия токен, TTFT. Отнася се до времевия интервал от изпращането на съобщението до получаването на първия токен от клиента."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "Детайли за генериране",
|
123
133
|
"total": "Общо разходи"
|
124
134
|
}
|
package/locales/de-DE/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "Text-Ausgabe",
|
120
120
|
"outputTitle": "Ausgabedetails",
|
121
121
|
"reasoning": "Tiefes Denken",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Tokens pro Sekunde, TPS. Bezieht sich auf die durchschnittliche Geschwindigkeit, mit der AI-Inhalte generiert werden (Token/Sekunde), und beginnt mit der Berechnung, nachdem das erste Token empfangen wurde."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "Zeit bis zum ersten Token, TTFT. Bezieht sich auf das Zeitintervall von dem Moment, in dem Sie eine Nachricht senden, bis der Client das erste Token erhält."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "Generierungsdetails",
|
123
133
|
"total": "Gesamter Verbrauch"
|
124
134
|
}
|
package/locales/en-US/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "Text Output",
|
120
120
|
"outputTitle": "Output Details",
|
121
121
|
"reasoning": "Deep Thinking",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Tokens Per Second (TPS). This indicates the average speed of AI-generated content (Tokens/second), calculated from the moment the first Token is received."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "Time To First Token (TTFT). This refers to the time interval from when you send a message to when the client receives the first Token."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "Generation Details",
|
123
133
|
"total": "Total Consumption"
|
124
134
|
}
|
package/locales/es-ES/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "Salida de texto",
|
120
120
|
"outputTitle": "Detalles de salida",
|
121
121
|
"reasoning": "Razonamiento profundo",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Tokens Por Segundo, TPS. Se refiere a la velocidad promedio de generación de contenido por la IA (Token/segundo), comenzando a contar desde que se recibe el primer Token."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "Tiempo Hasta el Primer Token, TTFT. Se refiere al intervalo de tiempo desde que envía un mensaje hasta que el cliente recibe el primer Token."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "Detalles de generación",
|
123
133
|
"total": "Total consumido"
|
124
134
|
}
|
package/locales/fa-IR/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "خروجی متنی",
|
120
120
|
"outputTitle": "جزئیات خروجی",
|
121
121
|
"reasoning": "تفکر عمیق",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "تعداد توکن در ثانیه، TPS. به میانگین سرعت تولید محتوای AI اشاره دارد (توکن/ثانیه) و از زمان دریافت اولین توکن شروع به محاسبه میشود."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "زمان تا اولین توکن، TTFT. به فاصله زمانی بین ارسال پیام شما و دریافت اولین توکن توسط کلاینت اشاره دارد."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "جزئیات تولید",
|
123
133
|
"total": "مجموع مصرف"
|
124
134
|
}
|
package/locales/fr-FR/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "Sortie texte",
|
120
120
|
"outputTitle": "Détails de la sortie",
|
121
121
|
"reasoning": "Raisonnement approfondi",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Tokens Par Seconde, TPS. Indique la vitesse moyenne de génération de contenu par l'IA (Token/seconde), calculée à partir de la réception du premier Token."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "Temps Jusqu'au Premier Token, TTFT. Indique l'intervalle de temps entre l'envoi de votre message et la réception du premier Token par le client."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "Détails de génération",
|
123
133
|
"total": "Total consommé"
|
124
134
|
}
|
package/locales/it-IT/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "Output testo",
|
120
120
|
"outputTitle": "Dettagli output",
|
121
121
|
"reasoning": "Ragionamento profondo",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Token Per Second, TPS. Indica la velocità media di generazione dei contenuti da parte dell'AI (Token/secondo), calcolata a partire dalla ricezione del primo Token."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "Time To First Token, TTFT. Indica l'intervallo di tempo che intercorre tra l'invio del messaggio e la ricezione del primo Token da parte del client."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "Dettagli generati",
|
123
133
|
"total": "Totale consumato"
|
124
134
|
}
|
package/locales/ja-JP/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "テキスト出力",
|
120
120
|
"outputTitle": "出力の詳細",
|
121
121
|
"reasoning": "深い思考",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "トークン毎秒(TPS)。AIが生成するコンテンツの平均速度(トークン/秒)を示し、最初のトークンを受信した時点から計算が始まります。"
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "最初のトークンまでの時間(TTFT)。メッセージを送信してからクライアントが最初のトークンを受信するまでの時間間隔を示します。"
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "生成の詳細",
|
123
133
|
"total": "合計消費"
|
124
134
|
}
|
package/locales/ko-KR/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "텍스트 출력",
|
120
120
|
"outputTitle": "출력 세부사항",
|
121
121
|
"reasoning": "심층 사고",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "초당 토큰 수(TPS). AI가 생성한 콘텐츠의 평균 속도(토큰/초)를 나타내며, 첫 번째 토큰을 수신한 후부터 계산됩니다."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "첫 번째 토큰까지의 시간(Time To First Token, TTFT). 메시지를 전송한 시점부터 클라이언트가 첫 번째 토큰을 수신할 때까지의 시간 간격을 나타냅니다."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "생성 세부사항",
|
123
133
|
"total": "총 소모"
|
124
134
|
}
|
package/locales/nl-NL/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "Tekstuitvoer",
|
120
120
|
"outputTitle": "Uitvoerdetails",
|
121
121
|
"reasoning": "Diep nadenken",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Tokens Per Second, TPS. Dit verwijst naar de gemiddelde snelheid van AI-gegenereerde inhoud (Token/seconde), die begint te tellen vanaf het moment dat het eerste Token is ontvangen."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "Time To First Token, TTFT. Dit verwijst naar de tijdsduur van het moment dat u een bericht verzendt tot het moment dat de client het eerste Token ontvangt."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "Genereren van details",
|
123
133
|
"total": "Totaal verbruik"
|
124
134
|
}
|
package/locales/pl-PL/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "Wyjście tekstowe",
|
120
120
|
"outputTitle": "Szczegóły wyjścia",
|
121
121
|
"reasoning": "Głębokie myślenie",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Tokeny na sekundę, TPS. Oznacza średnią prędkość generowania treści przez AI (Tokeny/sekundę), obliczaną od momentu otrzymania pierwszego Tokena."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "Czas do pierwszego tokena, TTFT. Oznacza czas od momentu wysłania wiadomości do momentu, w którym klient otrzymuje pierwszy Token."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "Szczegóły generacji",
|
123
133
|
"total": "Całkowite zużycie"
|
124
134
|
}
|
package/locales/pt-BR/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "Saída de texto",
|
120
120
|
"outputTitle": "Detalhes da saída",
|
121
121
|
"reasoning": "Raciocínio profundo",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Tokens Por Segundo, TPS. Refere-se à velocidade média de geração de conteúdo pela IA (Token/segundo), começando a contagem após o recebimento do primeiro Token."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "Tempo Para o Primeiro Token, TTFT. Refere-se ao intervalo de tempo desde que você envia a mensagem até que o cliente receba o primeiro Token."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "Detalhes da geração",
|
123
133
|
"total": "Total consumido"
|
124
134
|
}
|
package/locales/ru-RU/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "Текстовый выход",
|
120
120
|
"outputTitle": "Детали выхода",
|
121
121
|
"reasoning": "Глубокое мышление",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Токены в секунду (TPS). Средняя скорость генерации контента ИИ (Токенов/сек), начинается с момента получения первого токена."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "Время до первого токена (TTFT). Время, прошедшее с момента отправки вами сообщения до получения первого токена клиентом."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "Детали генерации",
|
123
133
|
"total": "Общее потребление"
|
124
134
|
}
|
package/locales/tr-TR/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "Metin çıkışı",
|
120
120
|
"outputTitle": "Çıkış detayları",
|
121
121
|
"reasoning": "Derin düşünme",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Token Başına Saniye, TPS. AI tarafından üretilen içeriğin ortalama hızını (Token/saniye) belirtir, ilk Token alındıktan sonra hesaplanmaya başlanır."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "İlk Token'a Kadar Geçen Süre, TTFT. Mesajınızı göndermenizden, istemcinin ilk Token'ı almasına kadar geçen zaman aralığını belirtir."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "Üretim detayları",
|
123
133
|
"total": "Toplam tüketim"
|
124
134
|
}
|
package/locales/vi-VN/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "Văn bản xuất",
|
120
120
|
"outputTitle": "Chi tiết xuất",
|
121
121
|
"reasoning": "Suy nghĩ sâu sắc",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Tokens Mỗi Giây, TPS. Chỉ tốc độ trung bình của nội dung do AI tạo ra (Token/giây), bắt đầu tính từ khi nhận được Token đầu tiên."
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "Thời Gian Đến Token Đầu Tiên, TTFT. Chỉ khoảng thời gian từ khi bạn gửi tin nhắn đến khi khách hàng nhận được Token đầu tiên."
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "Chi tiết tạo ra",
|
123
133
|
"total": "Tổng tiêu thụ"
|
124
134
|
}
|
package/locales/zh-CN/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "文本输出",
|
120
120
|
"outputTitle": "输出明细",
|
121
121
|
"reasoning": "深度思考",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "Tokens Per Second,TPS。指AI生成内容的平均速度(Token/秒),在接收到首个 Token 后开始计算。"
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "Time To First Token,TTFT。指从您发送消息到客户端接收到首个 Token 的时间间隔。"
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "生成明细",
|
123
133
|
"total": "总计消耗"
|
124
134
|
}
|
package/locales/zh-TW/chat.json
CHANGED
@@ -119,6 +119,16 @@
|
|
119
119
|
"outputText": "文本輸出",
|
120
120
|
"outputTitle": "輸出明細",
|
121
121
|
"reasoning": "深度思考",
|
122
|
+
"speed": {
|
123
|
+
"tps": {
|
124
|
+
"title": "TPS",
|
125
|
+
"tooltip": "每秒令牌數,TPS。指AI生成內容的平均速度(Token/秒),在接收到首個令牌後開始計算。"
|
126
|
+
},
|
127
|
+
"ttft": {
|
128
|
+
"title": "TTFT",
|
129
|
+
"tooltip": "首次令牌時間,TTFT。指從您發送消息到客戶端接收到首個令牌的時間間隔。"
|
130
|
+
}
|
131
|
+
},
|
122
132
|
"title": "生成明細",
|
123
133
|
"total": "總計消耗"
|
124
134
|
}
|
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "@lobehub/chat",
|
3
|
-
"version": "1.81.
|
3
|
+
"version": "1.81.8",
|
4
4
|
"description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
|
5
5
|
"keywords": [
|
6
6
|
"framework",
|
@@ -6,10 +6,11 @@ import { memo } from 'react';
|
|
6
6
|
import { useTranslation } from 'react-i18next';
|
7
7
|
import { Center, Flexbox } from 'react-layout-kit';
|
8
8
|
|
9
|
+
import InfoTooltip from '@/components/InfoTooltip';
|
9
10
|
import { aiModelSelectors, useAiInfraStore } from '@/store/aiInfra';
|
10
11
|
import { useGlobalStore } from '@/store/global';
|
11
12
|
import { systemStatusSelectors } from '@/store/global/selectors';
|
12
|
-
import {
|
13
|
+
import { MessageMetadata } from '@/types/message';
|
13
14
|
import { formatNumber } from '@/utils/format';
|
14
15
|
|
15
16
|
import ModelCard from './ModelCard';
|
@@ -17,19 +18,19 @@ import TokenProgress, { TokenProgressItem } from './TokenProgress';
|
|
17
18
|
import { getDetailsToken } from './tokens';
|
18
19
|
|
19
20
|
interface TokenDetailProps {
|
21
|
+
meta: MessageMetadata;
|
20
22
|
model: string;
|
21
23
|
provider: string;
|
22
|
-
usage: ModelTokensUsage;
|
23
24
|
}
|
24
25
|
|
25
|
-
const TokenDetail = memo<TokenDetailProps>(({
|
26
|
+
const TokenDetail = memo<TokenDetailProps>(({ meta, model, provider }) => {
|
26
27
|
const { t } = useTranslation('chat');
|
27
28
|
const theme = useTheme();
|
28
29
|
|
29
30
|
const modelCard = useAiInfraStore(aiModelSelectors.getModelCard(model, provider));
|
30
31
|
const isShowCredit = useGlobalStore(systemStatusSelectors.isShowCredit) && !!modelCard?.pricing;
|
31
32
|
|
32
|
-
const detailTokens = getDetailsToken(
|
33
|
+
const detailTokens = getDetailsToken(meta, modelCard);
|
33
34
|
const inputDetails = [
|
34
35
|
!!detailTokens.inputAudio && {
|
35
36
|
color: theme.cyan9,
|
@@ -113,6 +114,10 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
|
|
113
114
|
detailTokens.totalTokens!.credit / detailTokens.totalTokens!.token,
|
114
115
|
2,
|
115
116
|
);
|
117
|
+
|
118
|
+
const tps = meta?.tps ? formatNumber(meta.tps, 2) : undefined;
|
119
|
+
const ttft = meta?.ttft ? formatNumber(meta.ttft / 1000, 2) : undefined;
|
120
|
+
|
116
121
|
return (
|
117
122
|
<Popover
|
118
123
|
arrow={false}
|
@@ -170,6 +175,28 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
|
|
170
175
|
<div style={{ fontWeight: 500 }}>{averagePricing}</div>
|
171
176
|
</Flexbox>
|
172
177
|
)}
|
178
|
+
{tps && (
|
179
|
+
<Flexbox align={'center'} gap={4} horizontal justify={'space-between'}>
|
180
|
+
<Flexbox gap={8} horizontal>
|
181
|
+
<div style={{ color: theme.colorTextSecondary }}>
|
182
|
+
{t('messages.tokenDetails.speed.tps.title')}
|
183
|
+
</div>
|
184
|
+
<InfoTooltip title={t('messages.tokenDetails.speed.tps.tooltip')} />
|
185
|
+
</Flexbox>
|
186
|
+
<div style={{ fontWeight: 500 }}>{tps}</div>
|
187
|
+
</Flexbox>
|
188
|
+
)}
|
189
|
+
{ttft && (
|
190
|
+
<Flexbox align={'center'} gap={4} horizontal justify={'space-between'}>
|
191
|
+
<Flexbox gap={8} horizontal>
|
192
|
+
<div style={{ color: theme.colorTextSecondary }}>
|
193
|
+
{t('messages.tokenDetails.speed.ttft.title')}
|
194
|
+
</div>
|
195
|
+
<InfoTooltip title={t('messages.tokenDetails.speed.ttft.tooltip')} />
|
196
|
+
</Flexbox>
|
197
|
+
<div style={{ fontWeight: 500 }}>{ttft}s</div>
|
198
|
+
</Flexbox>
|
199
|
+
)}
|
173
200
|
</Flexbox>
|
174
201
|
</Flexbox>
|
175
202
|
</Flexbox>
|
@@ -31,7 +31,7 @@ const Usage = memo<UsageProps>(({ model, metadata, provider }) => {
|
|
31
31
|
</Center>
|
32
32
|
|
33
33
|
{!!metadata.totalTokens && (
|
34
|
-
<TokenDetail model={model as string} provider={provider}
|
34
|
+
<TokenDetail meta={metadata} model={model as string} provider={provider} />
|
35
35
|
)}
|
36
36
|
</Flexbox>
|
37
37
|
);
|
@@ -56,6 +56,7 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
|
|
56
56
|
async chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions) {
|
57
57
|
try {
|
58
58
|
const anthropicPayload = await this.buildAnthropicPayload(payload);
|
59
|
+
const inputStartAt = Date.now();
|
59
60
|
|
60
61
|
if (this.isDebug()) {
|
61
62
|
console.log('[requestPayload]');
|
@@ -79,9 +80,12 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
|
|
79
80
|
debugStream(debug.toReadableStream()).catch(console.error);
|
80
81
|
}
|
81
82
|
|
82
|
-
return StreamingResponse(
|
83
|
-
|
84
|
-
|
83
|
+
return StreamingResponse(
|
84
|
+
AnthropicStream(prod, { callbacks: options?.callback, inputStartAt }),
|
85
|
+
{
|
86
|
+
headers: options?.headers,
|
87
|
+
},
|
88
|
+
);
|
85
89
|
} catch (error) {
|
86
90
|
throw this.handleError(error);
|
87
91
|
}
|
@@ -225,7 +225,10 @@ describe('LobePerplexityAI', () => {
|
|
225
225
|
stream.push(decoder.decode(value));
|
226
226
|
}
|
227
227
|
|
228
|
-
|
228
|
+
// Slice out speed chunk
|
229
|
+
const noSpeedStream = stream.slice(0, -3);
|
230
|
+
|
231
|
+
expect(noSpeedStream).toEqual(
|
229
232
|
[
|
230
233
|
'id: 506d64fb-e7f2-4d94-b80f-158369e9446d',
|
231
234
|
'event: text',
|
@@ -1,9 +1,9 @@
|
|
1
|
+
import type { ChatModelCard } from '@/types/llm';
|
2
|
+
|
1
3
|
import { ModelProvider } from '../types';
|
2
4
|
import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';
|
3
5
|
import { TogetherAIModel } from './type';
|
4
6
|
|
5
|
-
import type { ChatModelCard } from '@/types/llm';
|
6
|
-
|
7
7
|
export const LobeTogetherAI = LobeOpenAICompatibleFactory({
|
8
8
|
baseURL: 'https://api.together.xyz/v1',
|
9
9
|
constructorOptions: {
|
@@ -18,24 +18,20 @@ export const LobeTogetherAI = LobeOpenAICompatibleFactory({
|
|
18
18
|
models: async ({ client }) => {
|
19
19
|
const { LOBE_DEFAULT_MODEL_LIST } = await import('@/config/aiModels');
|
20
20
|
|
21
|
-
const visionKeywords = [
|
22
|
-
'qvq',
|
23
|
-
'vision',
|
24
|
-
];
|
21
|
+
const visionKeywords = ['qvq', 'vision'];
|
25
22
|
|
26
|
-
const reasoningKeywords = [
|
27
|
-
'deepseek-r1',
|
28
|
-
'qwq',
|
29
|
-
];
|
23
|
+
const reasoningKeywords = ['deepseek-r1', 'qwq'];
|
30
24
|
|
31
25
|
client.baseURL = 'https://api.together.xyz/api';
|
32
26
|
|
33
|
-
const modelsPage = await client.models.list() as any;
|
27
|
+
const modelsPage = (await client.models.list()) as any;
|
34
28
|
const modelList: TogetherAIModel[] = modelsPage.body;
|
35
29
|
|
36
30
|
return modelList
|
37
31
|
.map((model) => {
|
38
|
-
const knownModel = LOBE_DEFAULT_MODEL_LIST.find(
|
32
|
+
const knownModel = LOBE_DEFAULT_MODEL_LIST.find(
|
33
|
+
(m) => model.id.toLowerCase() === m.id.toLowerCase(),
|
34
|
+
);
|
39
35
|
|
40
36
|
return {
|
41
37
|
contextWindowTokens: knownModel?.contextWindowTokens ?? undefined,
|
@@ -43,21 +39,21 @@ export const LobeTogetherAI = LobeOpenAICompatibleFactory({
|
|
43
39
|
displayName: model.display_name,
|
44
40
|
enabled: knownModel?.enabled || false,
|
45
41
|
functionCall:
|
46
|
-
model.description?.toLowerCase().includes('function calling')
|
47
|
-
|
48
|
-
|
49
|
-
id: model.
|
42
|
+
model.description?.toLowerCase().includes('function calling') ||
|
43
|
+
knownModel?.abilities?.functionCall ||
|
44
|
+
false,
|
45
|
+
id: model.id,
|
50
46
|
maxOutput: model.context_length,
|
51
47
|
reasoning:
|
52
|
-
reasoningKeywords.some(keyword => model.
|
53
|
-
|
54
|
-
|
48
|
+
reasoningKeywords.some((keyword) => model.id.toLowerCase().includes(keyword)) ||
|
49
|
+
knownModel?.abilities?.functionCall ||
|
50
|
+
false,
|
55
51
|
tokens: model.context_length,
|
56
52
|
vision:
|
57
|
-
model.description?.toLowerCase().includes('vision')
|
58
|
-
|
59
|
-
|
60
|
-
|
53
|
+
model.description?.toLowerCase().includes('vision') ||
|
54
|
+
visionKeywords.some((keyword) => model.id?.toLowerCase().includes(keyword)) ||
|
55
|
+
knownModel?.abilities?.functionCall ||
|
56
|
+
false,
|
61
57
|
};
|
62
58
|
})
|
63
59
|
.filter(Boolean) as ChatModelCard[];
|
@@ -50,7 +50,8 @@ interface Depth {
|
|
50
50
|
}
|
51
51
|
|
52
52
|
export interface TogetherAIModel {
|
53
|
-
|
53
|
+
id: string;
|
54
|
+
// eslint-disable-next-line typescript-sort-keys/interface
|
54
55
|
access: string;
|
55
56
|
config: Config;
|
56
57
|
context_length: number;
|
@@ -67,7 +68,6 @@ export interface TogetherAIModel {
|
|
67
68
|
license: string;
|
68
69
|
link: string;
|
69
70
|
modelInstanceConfig: ModelInstanceConfig;
|
70
|
-
name: string;
|
71
71
|
num_parameters: number;
|
72
72
|
pricing: Pricing;
|
73
73
|
show_in_playground: boolean;
|
@@ -201,6 +201,7 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
|
|
201
201
|
|
202
202
|
async chat({ responseMode, ...payload }: ChatStreamPayload, options?: ChatCompetitionOptions) {
|
203
203
|
try {
|
204
|
+
const inputStartAt = Date.now();
|
204
205
|
const postPayload = chatCompletion?.handlePayload
|
205
206
|
? chatCompletion.handlePayload(payload, this._options)
|
206
207
|
: ({
|
@@ -253,10 +254,14 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
|
|
253
254
|
debugStream(useForDebugStream).catch(console.error);
|
254
255
|
}
|
255
256
|
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
257
|
+
return StreamingResponse(
|
258
|
+
chatCompletion?.handleStream
|
259
|
+
? chatCompletion.handleStream(prod, streamOptions.callbacks)
|
260
|
+
: OpenAIStream(prod, { ...streamOptions, inputStartAt }),
|
261
|
+
{
|
262
|
+
headers: options?.headers,
|
263
|
+
},
|
264
|
+
);
|
260
265
|
}
|
261
266
|
|
262
267
|
if (debug?.chatCompletion?.()) {
|
@@ -269,10 +274,14 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
|
|
269
274
|
chatCompletion?.handleTransformResponseToStream || transformResponseToStream;
|
270
275
|
const stream = transformHandler(response as unknown as OpenAI.ChatCompletion);
|
271
276
|
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
277
|
+
return StreamingResponse(
|
278
|
+
chatCompletion?.handleStream
|
279
|
+
? chatCompletion.handleStream(stream, streamOptions.callbacks)
|
280
|
+
: OpenAIStream(stream, { ...streamOptions, inputStartAt }),
|
281
|
+
{
|
282
|
+
headers: options?.headers,
|
283
|
+
},
|
284
|
+
);
|
276
285
|
} catch (error) {
|
277
286
|
throw this.handleError(error);
|
278
287
|
}
|
@@ -61,9 +61,11 @@ describe('AnthropicStream', () => {
|
|
61
61
|
const onCompletionMock = vi.fn();
|
62
62
|
|
63
63
|
const protocolStream = AnthropicStream(mockAnthropicStream, {
|
64
|
-
|
65
|
-
|
66
|
-
|
64
|
+
callbacks: {
|
65
|
+
onStart: onStartMock,
|
66
|
+
onText: onTextMock,
|
67
|
+
onCompletion: onCompletionMock,
|
68
|
+
},
|
67
69
|
});
|
68
70
|
|
69
71
|
const decoder = new TextDecoder();
|
@@ -165,7 +167,9 @@ describe('AnthropicStream', () => {
|
|
165
167
|
const onToolCallMock = vi.fn();
|
166
168
|
|
167
169
|
const protocolStream = AnthropicStream(mockReadableStream, {
|
168
|
-
|
170
|
+
callbacks: {
|
171
|
+
onToolsCalling: onToolCallMock,
|
172
|
+
},
|
169
173
|
});
|
170
174
|
|
171
175
|
const decoder = new TextDecoder();
|
@@ -317,7 +321,9 @@ describe('AnthropicStream', () => {
|
|
317
321
|
const onToolCallMock = vi.fn();
|
318
322
|
|
319
323
|
const protocolStream = AnthropicStream(mockReadableStream, {
|
320
|
-
|
324
|
+
callbacks: {
|
325
|
+
onToolsCalling: onToolCallMock,
|
326
|
+
},
|
321
327
|
});
|
322
328
|
|
323
329
|
const decoder = new TextDecoder();
|
@@ -12,6 +12,7 @@ import {
|
|
12
12
|
convertIterableToStream,
|
13
13
|
createCallbacksTransformer,
|
14
14
|
createSSEProtocolTransformer,
|
15
|
+
createTokenSpeedCalculator,
|
15
16
|
} from './protocol';
|
16
17
|
|
17
18
|
export const transformAnthropicStream = (
|
@@ -188,9 +189,14 @@ export const transformAnthropicStream = (
|
|
188
189
|
}
|
189
190
|
};
|
190
191
|
|
192
|
+
export interface AnthropicStreamOptions {
|
193
|
+
callbacks?: ChatStreamCallbacks;
|
194
|
+
inputStartAt?: number;
|
195
|
+
}
|
196
|
+
|
191
197
|
export const AnthropicStream = (
|
192
198
|
stream: Stream<Anthropic.MessageStreamEvent> | ReadableStream,
|
193
|
-
callbacks
|
199
|
+
{ callbacks, inputStartAt }: AnthropicStreamOptions = {},
|
194
200
|
) => {
|
195
201
|
const streamStack: StreamContext = { id: '' };
|
196
202
|
|
@@ -198,6 +204,9 @@ export const AnthropicStream = (
|
|
198
204
|
stream instanceof ReadableStream ? stream : convertIterableToStream(stream);
|
199
205
|
|
200
206
|
return readableStream
|
201
|
-
.pipeThrough(
|
207
|
+
.pipeThrough(
|
208
|
+
createTokenSpeedCalculator(transformAnthropicStream, { inputStartAt, streamStack }),
|
209
|
+
)
|
210
|
+
.pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
|
202
211
|
.pipeThrough(createCallbacksTransformer(callbacks));
|
203
212
|
};
|
@@ -16,6 +16,7 @@ import {
|
|
16
16
|
createCallbacksTransformer,
|
17
17
|
createFirstErrorHandleTransformer,
|
18
18
|
createSSEProtocolTransformer,
|
19
|
+
createTokenSpeedCalculator,
|
19
20
|
generateToolCallId,
|
20
21
|
} from './protocol';
|
21
22
|
|
@@ -218,12 +219,13 @@ export interface OpenAIStreamOptions {
|
|
218
219
|
name: string;
|
219
220
|
}) => ILobeAgentRuntimeErrorType | undefined;
|
220
221
|
callbacks?: ChatStreamCallbacks;
|
222
|
+
inputStartAt?: number;
|
221
223
|
provider?: string;
|
222
224
|
}
|
223
225
|
|
224
226
|
export const OpenAIStream = (
|
225
227
|
stream: Stream<OpenAI.ChatCompletionChunk> | ReadableStream,
|
226
|
-
{ callbacks, provider, bizErrorTypeTransformer }: OpenAIStreamOptions = {},
|
228
|
+
{ callbacks, provider, bizErrorTypeTransformer, inputStartAt }: OpenAIStreamOptions = {},
|
227
229
|
) => {
|
228
230
|
const streamStack: StreamContext = { id: '' };
|
229
231
|
|
@@ -236,7 +238,8 @@ export const OpenAIStream = (
|
|
236
238
|
// provider like huggingface or minimax will return error in the stream,
|
237
239
|
// so in the first Transformer, we need to handle the error
|
238
240
|
.pipeThrough(createFirstErrorHandleTransformer(bizErrorTypeTransformer, provider))
|
239
|
-
.pipeThrough(
|
241
|
+
.pipeThrough(createTokenSpeedCalculator(transformOpenAIStream, { inputStartAt, streamStack }))
|
242
|
+
.pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
|
240
243
|
.pipeThrough(createCallbacksTransformer(callbacks))
|
241
244
|
);
|
242
245
|
};
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import { describe, expect, it } from 'vitest';
|
2
2
|
|
3
|
-
import { createSSEDataExtractor } from './protocol';
|
3
|
+
import { createSSEDataExtractor, createTokenSpeedCalculator } from './protocol';
|
4
4
|
|
5
5
|
describe('createSSEDataExtractor', () => {
|
6
6
|
// Helper function to convert string to Uint8Array
|
@@ -135,3 +135,69 @@ describe('createSSEDataExtractor', () => {
|
|
135
135
|
});
|
136
136
|
});
|
137
137
|
});
|
138
|
+
|
139
|
+
describe('createTokenSpeedCalculator', async () => {
|
140
|
+
// Mock the param from caller - 1000 to avoid div 0
|
141
|
+
const inputStartAt = Date.now() - 1000;
|
142
|
+
|
143
|
+
// Helper function to process chunks through transformer
|
144
|
+
const processChunk = async (transformer: TransformStream, chunk: any) => {
|
145
|
+
const results: any[] = [];
|
146
|
+
const readable = new ReadableStream({
|
147
|
+
start(controller) {
|
148
|
+
controller.enqueue(chunk);
|
149
|
+
controller.close();
|
150
|
+
},
|
151
|
+
});
|
152
|
+
|
153
|
+
const writable = new WritableStream({
|
154
|
+
write(chunk) {
|
155
|
+
results.push(chunk);
|
156
|
+
},
|
157
|
+
});
|
158
|
+
|
159
|
+
await readable.pipeThrough(transformer).pipeTo(writable);
|
160
|
+
|
161
|
+
return results;
|
162
|
+
};
|
163
|
+
|
164
|
+
it('should calculate token speed correctly', async () => {
|
165
|
+
const chunks = [
|
166
|
+
{ data: '', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'text' },
|
167
|
+
{ data: 'hi', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'text' },
|
168
|
+
{ data: 'stop', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'stop' },
|
169
|
+
{
|
170
|
+
data: {
|
171
|
+
inputTextTokens: 9,
|
172
|
+
outputTextTokens: 1,
|
173
|
+
totalInputTokens: 9,
|
174
|
+
totalOutputTokens: 1,
|
175
|
+
totalTokens: 10,
|
176
|
+
},
|
177
|
+
id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy',
|
178
|
+
type: 'usage',
|
179
|
+
},
|
180
|
+
];
|
181
|
+
|
182
|
+
const transformer = createTokenSpeedCalculator((v) => v, { inputStartAt });
|
183
|
+
const results = await processChunk(transformer, chunks);
|
184
|
+
expect(results).toHaveLength(chunks.length + 1);
|
185
|
+
const speedChunk = results.slice(-1)[0];
|
186
|
+
expect(speedChunk.id).toBe('output_speed');
|
187
|
+
expect(speedChunk.type).toBe('speed');
|
188
|
+
expect(speedChunk.data.tps).not.toBeNaN();
|
189
|
+
expect(speedChunk.data.ttft).not.toBeNaN();
|
190
|
+
});
|
191
|
+
|
192
|
+
it('should not calculate token speed if no usage', async () => {
|
193
|
+
const chunks = [
|
194
|
+
{ data: '', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'text' },
|
195
|
+
{ data: 'hi', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'text' },
|
196
|
+
{ data: 'stop', id: 'chatcmpl-BKO1bogylHvMaYfETjTAzrCguYwZy', type: 'stop' },
|
197
|
+
];
|
198
|
+
|
199
|
+
const transformer = createTokenSpeedCalculator((v) => v, { inputStartAt });
|
200
|
+
const results = await processChunk(transformer, chunks);
|
201
|
+
expect(results).toHaveLength(chunks.length);
|
202
|
+
});
|
203
|
+
});
|
@@ -1,4 +1,4 @@
|
|
1
|
-
import { ModelTokensUsage } from '@/types/message';
|
1
|
+
import { ModelSpeed, ModelTokensUsage } from '@/types/message';
|
2
2
|
import { safeParseJSON } from '@/utils/safeParseJSON';
|
3
3
|
|
4
4
|
import { AgentRuntimeErrorType } from '../../error';
|
@@ -52,6 +52,8 @@ export interface StreamProtocolChunk {
|
|
52
52
|
| 'error'
|
53
53
|
// token usage
|
54
54
|
| 'usage'
|
55
|
+
// performance monitor
|
56
|
+
| 'speed'
|
55
57
|
// unknown data result
|
56
58
|
| 'data';
|
57
59
|
}
|
@@ -287,3 +289,46 @@ export const createSSEDataExtractor = () =>
|
|
287
289
|
}
|
288
290
|
},
|
289
291
|
});
|
292
|
+
|
293
|
+
export const TOKEN_SPEED_CHUNK_ID = 'output_speed';
|
294
|
+
|
295
|
+
/**
|
296
|
+
* Create a middleware to calculate the token generate speed
|
297
|
+
* @requires createSSEProtocolTransformer
|
298
|
+
*/
|
299
|
+
export const createTokenSpeedCalculator = (
|
300
|
+
transformer: (chunk: any, stack: StreamContext) => StreamProtocolChunk | StreamProtocolChunk[],
|
301
|
+
{ streamStack, inputStartAt }: { inputStartAt?: number; streamStack?: StreamContext } = {},
|
302
|
+
) => {
|
303
|
+
let outputStartAt: number | undefined;
|
304
|
+
|
305
|
+
const process = (chunk: StreamProtocolChunk) => {
|
306
|
+
let result = [chunk];
|
307
|
+
// if the chunk is the first text chunk, set as output start
|
308
|
+
if (!outputStartAt && chunk.type === 'text') outputStartAt = Date.now();
|
309
|
+
// if the chunk is the stop chunk, set as output finish
|
310
|
+
if (inputStartAt && outputStartAt && chunk.type === 'usage') {
|
311
|
+
const outputTokens = chunk.data?.totalOutputTokens || chunk.data?.outputTextTokens;
|
312
|
+
result.push({
|
313
|
+
data: {
|
314
|
+
tps: (outputTokens / (Date.now() - outputStartAt)) * 1000,
|
315
|
+
ttft: outputStartAt - inputStartAt,
|
316
|
+
} as ModelSpeed,
|
317
|
+
id: TOKEN_SPEED_CHUNK_ID,
|
318
|
+
type: 'speed',
|
319
|
+
});
|
320
|
+
}
|
321
|
+
return result;
|
322
|
+
};
|
323
|
+
|
324
|
+
return new TransformStream({
|
325
|
+
transform(chunk, controller) {
|
326
|
+
let result = transformer(chunk, streamStack || { id: '' });
|
327
|
+
if (!Array.isArray(result)) result = [result];
|
328
|
+
result.forEach((r) => {
|
329
|
+
const processed = process(r);
|
330
|
+
if (processed) processed.forEach((p) => controller.enqueue(p));
|
331
|
+
});
|
332
|
+
},
|
333
|
+
});
|
334
|
+
};
|
@@ -122,6 +122,17 @@ export default {
|
|
122
122
|
outputText: '文本输出',
|
123
123
|
outputTitle: '输出明细',
|
124
124
|
reasoning: '深度思考',
|
125
|
+
speed: {
|
126
|
+
tps: {
|
127
|
+
title: 'TPS',
|
128
|
+
tooltip:
|
129
|
+
'Tokens Per Second,TPS。指AI生成内容的平均速度(Token/秒),在接收到首个 Token 后开始计算。',
|
130
|
+
},
|
131
|
+
ttft: {
|
132
|
+
title: 'TTFT',
|
133
|
+
tooltip: 'Time To First Token,TTFT。指从您发送消息到客户端接收到首个 Token 的时间间隔。',
|
134
|
+
},
|
135
|
+
},
|
125
136
|
title: '生成明细',
|
126
137
|
total: '总计消耗',
|
127
138
|
},
|
@@ -576,7 +576,7 @@ export const generateAIChat: StateCreator<
|
|
576
576
|
},
|
577
577
|
onFinish: async (
|
578
578
|
content,
|
579
|
-
{ traceId, observationId, toolCalls, reasoning, grounding, usage },
|
579
|
+
{ traceId, observationId, toolCalls, reasoning, grounding, usage, speed },
|
580
580
|
) => {
|
581
581
|
// if there is traceId, update it
|
582
582
|
if (traceId) {
|
@@ -611,8 +611,8 @@ export const generateAIChat: StateCreator<
|
|
611
611
|
toolCalls,
|
612
612
|
reasoning: !!reasoning ? { ...reasoning, duration } : undefined,
|
613
613
|
search: !!grounding?.citations ? grounding : undefined,
|
614
|
-
metadata: usage,
|
615
614
|
imageList: finalImages.length > 0 ? finalImages : undefined,
|
615
|
+
metadata: speed ? { ...usage, ...speed } : usage,
|
616
616
|
});
|
617
617
|
},
|
618
618
|
onMessageHandle: async (chunk) => {
|
@@ -44,8 +44,16 @@ export interface ModelTokensUsage {
|
|
44
44
|
totalTokens?: number;
|
45
45
|
}
|
46
46
|
|
47
|
+
export interface ModelSpeed {
|
48
|
+
// tokens per second
|
49
|
+
tps?: number;
|
50
|
+
// time to fist token
|
51
|
+
ttft?: number;
|
52
|
+
}
|
53
|
+
|
47
54
|
export interface MessageMetadata extends ModelTokensUsage {
|
48
55
|
tps?: number;
|
56
|
+
ttft?: number;
|
49
57
|
}
|
50
58
|
|
51
59
|
export type MessageRoleType = 'user' | 'system' | 'assistant' | 'tool';
|
@@ -11,6 +11,7 @@ import {
|
|
11
11
|
MessageToolCallChunk,
|
12
12
|
MessageToolCallSchema,
|
13
13
|
ModelReasoning,
|
14
|
+
ModelSpeed,
|
14
15
|
ModelTokensUsage,
|
15
16
|
} from '@/types/message';
|
16
17
|
import { ChatImageChunk } from '@/types/message/image';
|
@@ -29,6 +30,7 @@ export type OnFinishHandler = (
|
|
29
30
|
images?: ChatImageChunk[];
|
30
31
|
observationId?: string | null;
|
31
32
|
reasoning?: ModelReasoning;
|
33
|
+
speed?: ModelSpeed;
|
32
34
|
toolCalls?: MessageToolCall[];
|
33
35
|
traceId?: string | null;
|
34
36
|
type?: SSEFinishType;
|
@@ -41,6 +43,11 @@ export interface MessageUsageChunk {
|
|
41
43
|
usage: ModelTokensUsage;
|
42
44
|
}
|
43
45
|
|
46
|
+
export interface MessageSpeedChunk {
|
47
|
+
speed: ModelSpeed;
|
48
|
+
type: 'speed';
|
49
|
+
}
|
50
|
+
|
44
51
|
export interface MessageTextChunk {
|
45
52
|
text: string;
|
46
53
|
type: 'text';
|
@@ -82,7 +89,8 @@ export interface FetchSSEOptions {
|
|
82
89
|
| MessageReasoningChunk
|
83
90
|
| MessageGroundingChunk
|
84
91
|
| MessageUsageChunk
|
85
|
-
| MessageBase64ImageChunk
|
92
|
+
| MessageBase64ImageChunk
|
93
|
+
| MessageSpeedChunk,
|
86
94
|
) => void;
|
87
95
|
smoothing?: SmoothingParams | boolean;
|
88
96
|
}
|
@@ -342,6 +350,7 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
|
|
342
350
|
let grounding: GroundingSearch | undefined = undefined;
|
343
351
|
let usage: ModelTokensUsage | undefined = undefined;
|
344
352
|
let images: ChatImageChunk[] = [];
|
353
|
+
let speed: ModelSpeed | undefined = undefined;
|
345
354
|
|
346
355
|
await fetchEventSource(url, {
|
347
356
|
body: options.body,
|
@@ -433,6 +442,12 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
|
|
433
442
|
break;
|
434
443
|
}
|
435
444
|
|
445
|
+
case 'speed': {
|
446
|
+
speed = data;
|
447
|
+
options.onMessageHandle?.({ speed: data, type: 'speed' });
|
448
|
+
break;
|
449
|
+
}
|
450
|
+
|
436
451
|
case 'grounding': {
|
437
452
|
grounding = data;
|
438
453
|
options.onMessageHandle?.({ grounding: data, type: 'grounding' });
|
@@ -517,6 +532,7 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
|
|
517
532
|
images: images.length > 0 ? images : undefined,
|
518
533
|
observationId,
|
519
534
|
reasoning: !!thinking ? { content: thinking, signature: thinkingSignature } : undefined,
|
535
|
+
speed,
|
520
536
|
toolCalls,
|
521
537
|
traceId,
|
522
538
|
type: finishedType,
|