@wq-hook/volcano-react 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -32,8 +32,13 @@ var index_exports = {};
32
32
  __export(index_exports, {
33
33
  AudioProgressBar: () => AudioProgressBar_default,
34
34
  AudioWaveVisualizer: () => AudioWaveVisualizer_default,
35
+ StreamingTextSplitter: () => StreamingTextSplitter,
36
+ clearSessionAudioCache: () => clearSessionAudioCache,
37
+ findSessionCacheByText: () => findSessionCacheByText,
38
+ getSessionAudioCache: () => getSessionAudioCache,
35
39
  splitTextByDelimiters: () => splitTextByDelimiters,
36
40
  useMessageTTS: () => useMessageTTS,
41
+ useStreamTTS: () => useStreamTTS,
37
42
  useVolcanoASR: () => useVolcanoASR,
38
43
  useVolcanoTTS: () => useVolcanoTTS
39
44
  });
@@ -550,10 +555,12 @@ function useMessageTTS({
550
555
  const [isSynthesizing, setIsSynthesizing] = (0, import_react3.useState)(false);
551
556
  const [error, setErrorState] = (0, import_react3.useState)(null);
552
557
  const [progress, setProgress] = (0, import_react3.useState)(0);
553
- const [visualizationData, setVisualizationData] = (0, import_react3.useState)({
554
- frequencyData: new Uint8Array(0),
555
- timeDomainData: new Uint8Array(0)
556
- });
558
+ const [visualizationData, setVisualizationData] = (0, import_react3.useState)(
559
+ {
560
+ frequencyData: new Uint8Array(0),
561
+ timeDomainData: new Uint8Array(0)
562
+ }
563
+ );
557
564
  const instanceId = (0, import_react3.useRef)(
558
565
  `tts-${Date.now()}-${Math.random().toString(36).slice(2)}`
559
566
  ).current;
@@ -761,7 +768,11 @@ function useMessageTTS({
761
768
  console.error("Audio playback error:", e, audio.error);
762
769
  metricsCollector.record({
763
770
  name: "tts_error",
764
- labels: { error_code: "playback_error", voice, detail: audio.error?.message || String(audio.error?.code) },
771
+ labels: {
772
+ error_code: "playback_error",
773
+ voice,
774
+ detail: audio.error?.message || String(audio.error?.code)
775
+ },
765
776
  value: 1,
766
777
  timestamp: Date.now()
767
778
  });
@@ -779,7 +790,10 @@ function useMessageTTS({
779
790
  }
780
791
  };
781
792
  if (cachedData) {
782
- const totalSize = cachedData.reduce((acc, buf) => acc + buf.byteLength, 0);
793
+ const totalSize = cachedData.reduce(
794
+ (acc, buf) => acc + buf.byteLength,
795
+ 0
796
+ );
783
797
  metricsCollector.record({
784
798
  name: "tts_cache_hit",
785
799
  labels: { voice, speed },
@@ -797,7 +811,9 @@ function useMessageTTS({
797
811
  })
798
812
  );
799
813
  if (totalSize === 0) {
800
- console.warn("[useMessageTTS] Cached data is empty, falling back to stream");
814
+ console.warn(
815
+ "[useMessageTTS] Cached data is empty, falling back to stream"
816
+ );
801
817
  } else {
802
818
  const blob = new Blob(cachedData, { type: "audio/mpeg" });
803
819
  const url2 = URL.createObjectURL(blob);
@@ -816,7 +832,10 @@ function useMessageTTS({
816
832
  }
817
833
  console.log("[useMessageTTS] Cache miss, starting stream");
818
834
  clientRef.current = (0, import_tts2.WebsocketMSE)({ autoStartSession: true });
819
- const formattedText = import_volcano_sdk2.MarkdownFormatter.format(text).replace((0, import_emoji_regex2.default)(), "");
835
+ const formattedText = import_volcano_sdk2.MarkdownFormatter.format(text).replace(
836
+ (0, import_emoji_regex2.default)(),
837
+ ""
838
+ );
820
839
  const segments = splitTextByDelimiters(formattedText);
821
840
  const url = clientRef.current.start({
822
841
  url: buildFullUrl2(WS_URL, {
@@ -934,6 +953,14 @@ function useMessageTTS({
934
953
  console.warn(
935
954
  `[useMessageTTS] Voice ${failedVoice} failed, switching to fallback voice ${fallbackVoice}`
936
955
  );
956
+ if (clientRef.current) {
957
+ clientRef.current.close();
958
+ clientRef.current = null;
959
+ }
960
+ if (audioRef.current) {
961
+ audioRef.current.pause();
962
+ audioRef.current = null;
963
+ }
937
964
  executeTTS(text, fallbackVoice);
938
965
  } else {
939
966
  playFallback(text);
@@ -943,7 +970,7 @@ function useMessageTTS({
943
970
  );
944
971
  const play = (0, import_react3.useCallback)(
945
972
  (text) => {
946
- const voice = audioParams?.speaker || "default";
973
+ const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
947
974
  return executeTTS(text, voice);
948
975
  },
949
976
  [audioParams, executeTTS]
@@ -996,7 +1023,9 @@ function useMessageTTS({
996
1023
  if (audioRef.current) {
997
1024
  let duration = audioRef.current.duration;
998
1025
  if (!isFinite(duration) && audioRef.current.buffered.length > 0) {
999
- duration = audioRef.current.buffered.end(audioRef.current.buffered.length - 1);
1026
+ duration = audioRef.current.buffered.end(
1027
+ audioRef.current.buffered.length - 1
1028
+ );
1000
1029
  }
1001
1030
  if (isFinite(duration) && duration > 0) {
1002
1031
  const time = percentage / 100 * duration;
@@ -1025,8 +1054,725 @@ function useMessageTTS({
1025
1054
  };
1026
1055
  }
1027
1056
 
1028
- // src/components/AudioWaveVisualizer.tsx
1057
+ // src/tts/useStreamTTS.ts
1058
+ var import_tts3 = require("@wq-hook/volcano-sdk/tts");
1029
1059
  var import_react4 = require("react");
1060
+
1061
+ // src/tts/StreamingTextSplitter.ts
1062
+ var import_volcano_sdk3 = require("@wq-hook/volcano-sdk");
1063
+ var import_emoji_regex3 = __toESM(require("emoji-regex"));
1064
+ var StreamingTextSplitter = class {
1065
+ constructor(options = {}) {
1066
+ /** 当前缓冲区 */
1067
+ this.buffer = "";
1068
+ /** 分段索引计数器 */
1069
+ this.segmentIndex = 0;
1070
+ /** 已完成的分段列表 */
1071
+ this.segments = [];
1072
+ /** 是否已完成 */
1073
+ this.isCompleted = false;
1074
+ this.maxLength = options.maxLength || 150;
1075
+ this.minLength = options.minLength || 10;
1076
+ this.onSegmentComplete = options.onSegmentComplete;
1077
+ this.onAllComplete = options.onAllComplete;
1078
+ }
1079
+ /**
1080
+ * 接收流式文本块
1081
+ * @param chunk - 文本块
1082
+ */
1083
+ onChunk(chunk) {
1084
+ if (!chunk || this.isCompleted) return;
1085
+ this.buffer += chunk;
1086
+ if (this.detectBoundary(chunk)) {
1087
+ const newlineIndex = this.buffer.indexOf("\n");
1088
+ if (newlineIndex !== -1) {
1089
+ if (newlineIndex === 0) {
1090
+ this.buffer = this.buffer.substring(1);
1091
+ return;
1092
+ }
1093
+ const segmentBuffer = this.buffer.substring(0, newlineIndex);
1094
+ this.buffer = this.buffer.substring(newlineIndex + 1);
1095
+ this.flushSegmentWithBuffer(segmentBuffer);
1096
+ while (this.buffer.includes("\n")) {
1097
+ const nextNewlineIndex = this.buffer.indexOf("\n");
1098
+ if (nextNewlineIndex === 0) {
1099
+ this.buffer = this.buffer.substring(1);
1100
+ continue;
1101
+ }
1102
+ const nextSegmentBuffer = this.buffer.substring(0, nextNewlineIndex);
1103
+ this.buffer = this.buffer.substring(nextNewlineIndex + 1);
1104
+ this.flushSegmentWithBuffer(nextSegmentBuffer);
1105
+ }
1106
+ }
1107
+ }
1108
+ }
1109
+ /**
1110
+ * 检测分段边界
1111
+ * @param chunk - 最新接收的文本块
1112
+ * @returns 是否应该分段
1113
+ */
1114
+ detectBoundary(chunk) {
1115
+ if (chunk.includes("\n")) {
1116
+ if (this.buffer.length >= this.maxLength) {
1117
+ this.forceSplitAtSentenceBoundary();
1118
+ }
1119
+ return true;
1120
+ }
1121
+ if (this.buffer.length >= this.maxLength) {
1122
+ this.forceSplitAtSentenceBoundary();
1123
+ return true;
1124
+ }
1125
+ return false;
1126
+ }
1127
+ /**
1128
+ * 在句子边界强制拆分超长段落
1129
+ */
1130
+ forceSplitAtSentenceBoundary() {
1131
+ const content = this.buffer;
1132
+ const sentenceEnders = /[。?!]/g;
1133
+ let lastMatch = null;
1134
+ let match = null;
1135
+ while ((match = sentenceEnders.exec(content)) !== null) {
1136
+ lastMatch = match;
1137
+ }
1138
+ if (lastMatch && lastMatch.index > this.minLength) {
1139
+ const splitPoint = lastMatch.index + 1;
1140
+ const firstPart = content.substring(0, splitPoint);
1141
+ const secondPart = content.substring(splitPoint);
1142
+ this.buffer = firstPart;
1143
+ this.flushSegment();
1144
+ this.buffer = secondPart;
1145
+ } else {
1146
+ const midPoint = Math.floor(content.length / 2);
1147
+ const firstPart = content.substring(0, midPoint);
1148
+ const secondPart = content.substring(midPoint);
1149
+ this.buffer = firstPart;
1150
+ this.flushSegment();
1151
+ this.buffer = secondPart;
1152
+ }
1153
+ }
1154
+ /**
1155
+ * 使用指定缓冲区内容刷新为分段
1156
+ * @param bufferToFlush - 要分段的缓冲区内容
1157
+ */
1158
+ flushSegmentWithBuffer(bufferToFlush) {
1159
+ const content = bufferToFlush;
1160
+ if (!content) return;
1161
+ const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
1162
+ const isTooShort = content.length < 3;
1163
+ if (isPureSymbols && isTooShort) {
1164
+ return;
1165
+ }
1166
+ const formattedContent = import_volcano_sdk3.MarkdownFormatter.format(content).replace((0, import_emoji_regex3.default)(), "");
1167
+ if (!formattedContent) return;
1168
+ let subSegments = [formattedContent];
1169
+ if (formattedContent.length > this.maxLength) {
1170
+ subSegments = this.splitLongSegment(formattedContent);
1171
+ }
1172
+ for (const subSegment of subSegments) {
1173
+ if (!subSegment) continue;
1174
+ const segment = {
1175
+ index: this.segmentIndex++,
1176
+ content: subSegment,
1177
+ length: subSegment.length,
1178
+ sent: false
1179
+ };
1180
+ this.segments.push(segment);
1181
+ this.onSegmentComplete?.(segment);
1182
+ }
1183
+ }
1184
+ /**
1185
+ * 刷新当前缓冲区为分段
1186
+ */
1187
+ flushSegment() {
1188
+ const content = this.buffer.trim();
1189
+ if (!content) {
1190
+ this.buffer = "";
1191
+ return;
1192
+ }
1193
+ const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
1194
+ const isTooShort = content.length < 3;
1195
+ if (isPureSymbols && isTooShort) {
1196
+ this.buffer = "";
1197
+ return;
1198
+ }
1199
+ const formattedContent = import_volcano_sdk3.MarkdownFormatter.format(content).replace((0, import_emoji_regex3.default)(), "");
1200
+ if (!formattedContent) {
1201
+ this.buffer = "";
1202
+ return;
1203
+ }
1204
+ let subSegments = [formattedContent];
1205
+ if (formattedContent.length > this.maxLength) {
1206
+ subSegments = this.splitLongSegment(formattedContent);
1207
+ }
1208
+ for (const subSegment of subSegments) {
1209
+ if (!subSegment) continue;
1210
+ const segment = {
1211
+ index: this.segmentIndex++,
1212
+ content: subSegment,
1213
+ length: subSegment.length,
1214
+ sent: false
1215
+ };
1216
+ this.segments.push(segment);
1217
+ this.onSegmentComplete?.(segment);
1218
+ }
1219
+ this.buffer = "";
1220
+ }
1221
+ /**
1222
+ * 拆分超长分段
1223
+ * @param segment - 超长的分段
1224
+ * @returns 拆分后的分段数组
1225
+ */
1226
+ splitLongSegment(segment) {
1227
+ const result = [];
1228
+ let current = "";
1229
+ for (const char of segment) {
1230
+ current += char;
1231
+ const shouldSplit = /[。?!,,]/.test(char);
1232
+ if (shouldSplit && current.length <= this.maxLength) {
1233
+ result.push(current);
1234
+ current = "";
1235
+ } else if (current.length >= this.maxLength) {
1236
+ result.push(current);
1237
+ current = "";
1238
+ }
1239
+ }
1240
+ if (current) {
1241
+ result.push(current);
1242
+ }
1243
+ return result.filter((s) => s.length > 0);
1244
+ }
1245
+ /**
1246
+ * 完成流式输入
1247
+ * 处理剩余的缓冲区内容
1248
+ */
1249
+ complete() {
1250
+ if (this.isCompleted) return;
1251
+ this.isCompleted = true;
1252
+ while (this.buffer.includes("\n")) {
1253
+ const newlineIndex = this.buffer.indexOf("\n");
1254
+ if (newlineIndex === 0) {
1255
+ this.buffer = this.buffer.substring(1);
1256
+ continue;
1257
+ }
1258
+ const segmentBuffer = this.buffer.substring(0, newlineIndex);
1259
+ this.buffer = this.buffer.substring(newlineIndex + 1);
1260
+ this.flushSegmentWithBuffer(segmentBuffer);
1261
+ }
1262
+ if (this.buffer.trim()) {
1263
+ this.flushSegment();
1264
+ }
1265
+ this.onAllComplete?.(this.segments);
1266
+ }
1267
+ /**
1268
+ * 重置分段器状态
1269
+ */
1270
+ reset() {
1271
+ this.buffer = "";
1272
+ this.segmentIndex = 0;
1273
+ this.segments = [];
1274
+ this.isCompleted = false;
1275
+ }
1276
+ /**
1277
+ * 获取当前缓冲区内容
1278
+ */
1279
+ getBuffer() {
1280
+ return this.buffer;
1281
+ }
1282
+ /**
1283
+ * 获取已分段的列表
1284
+ */
1285
+ getSegments() {
1286
+ return this.segments;
1287
+ }
1288
+ /**
1289
+ * 获取统计信息
1290
+ */
1291
+ getStats() {
1292
+ return {
1293
+ bufferLength: this.buffer.length,
1294
+ segmentCount: this.segments.length,
1295
+ totalChars: this.segments.reduce((sum, seg) => sum + seg.length, 0)
1296
+ };
1297
+ }
1298
+ };
1299
+
1300
+ // src/tts/useStreamTTS.ts
1301
+ var WS_URL2 = "wss://openspeech.bytedance.com/api/v3/tts/bidirection";
1302
+ var activeInstances2 = /* @__PURE__ */ new Map();
1303
+ var sessionAudioCache = /* @__PURE__ */ new Map();
1304
+ function buildFullUrl3(url, params) {
1305
+ const arr = [];
1306
+ for (const key in params) {
1307
+ if (Object.prototype.hasOwnProperty.call(params, key)) {
1308
+ arr.push(`${key}=${encodeURIComponent(params[key])}`);
1309
+ }
1310
+ }
1311
+ return `${url}?${arr.join("&")}`;
1312
+ }
1313
+ function useStreamTTS({
1314
+ ttsConfig,
1315
+ audioParams,
1316
+ autoPlay = true,
1317
+ metricsCollector = new NoopMetricsCollector(),
1318
+ onPlayStart,
1319
+ onPlayPause,
1320
+ onPlayResume,
1321
+ onPlayEnd,
1322
+ onError,
1323
+ visualization,
1324
+ maxSegmentLength = 150
1325
+ }) {
1326
+ const [isConnected, setIsConnected] = (0, import_react4.useState)(false);
1327
+ const [isSessionStarted, setIsSessionStarted] = (0, import_react4.useState)(false);
1328
+ const [isSynthesizing, setIsSynthesizing] = (0, import_react4.useState)(false);
1329
+ const [isPlaying, setIsPlaying] = (0, import_react4.useState)(false);
1330
+ const [isPaused, setIsPaused] = (0, import_react4.useState)(false);
1331
+ const [error, setErrorState] = (0, import_react4.useState)(null);
1332
+ const [streamText, setStreamText] = (0, import_react4.useState)("");
1333
+ const [progress, setProgress] = (0, import_react4.useState)(0);
1334
+ const [visualizationData, setVisualizationData] = (0, import_react4.useState)({
1335
+ frequencyData: new Uint8Array(0),
1336
+ timeDomainData: new Uint8Array(0)
1337
+ });
1338
+ const instanceId = (0, import_react4.useRef)(`tts-stream-${Date.now()}-${Math.random().toString(36).slice(2)}`).current;
1339
+ const clientRef = (0, import_react4.useRef)(null);
1340
+ const audioRef = (0, import_react4.useRef)(null);
1341
+ const audioContextRef = (0, import_react4.useRef)(null);
1342
+ const analyserRef = (0, import_react4.useRef)(null);
1343
+ const sourceRef = (0, import_react4.useRef)(null);
1344
+ const audioUrlRef = (0, import_react4.useRef)(null);
1345
+ const streamTextRef = (0, import_react4.useRef)("");
1346
+ const isConnectedRef = (0, import_react4.useRef)(false);
1347
+ const isSessionStartedRef = (0, import_react4.useRef)(false);
1348
+ const calledSessionStartedRef = (0, import_react4.useRef)(false);
1349
+ const splitterRef = (0, import_react4.useRef)(null);
1350
+ const segmentQueueRef = (0, import_react4.useRef)([]);
1351
+ const isSendingRef = (0, import_react4.useRef)(false);
1352
+ const sessionAudioBuffersRef = (0, import_react4.useRef)([]);
1353
+ const isStreamFinishedRef = (0, import_react4.useRef)(false);
1354
+ const isSessionFinishedRef = (0, import_react4.useRef)(false);
1355
+ const resolveAllSegmentsSentRef = (0, import_react4.useRef)(null);
1356
+ const currentVoiceRef = (0, import_react4.useRef)("");
1357
+ const initAudioContext = (0, import_react4.useCallback)(() => {
1358
+ if (!audioRef.current) return;
1359
+ if (!audioContextRef.current) {
1360
+ const AudioContextClass = window.AudioContext || window.webkitAudioContext;
1361
+ audioContextRef.current = new AudioContextClass();
1362
+ }
1363
+ if (audioContextRef.current.state === "suspended") {
1364
+ audioContextRef.current.resume();
1365
+ }
1366
+ if (!analyserRef.current) {
1367
+ analyserRef.current = audioContextRef.current.createAnalyser();
1368
+ analyserRef.current.fftSize = visualization?.fftSize || 256;
1369
+ }
1370
+ if (!sourceRef.current) {
1371
+ try {
1372
+ sourceRef.current = audioContextRef.current.createMediaElementSource(audioRef.current);
1373
+ sourceRef.current.connect(analyserRef.current);
1374
+ analyserRef.current.connect(audioContextRef.current.destination);
1375
+ } catch (e) {
1376
+ }
1377
+ }
1378
+ }, [visualization?.fftSize]);
1379
+ const cleanupAudio = (0, import_react4.useCallback)(() => {
1380
+ if (audioUrlRef.current) {
1381
+ URL.revokeObjectURL(audioUrlRef.current);
1382
+ audioUrlRef.current = null;
1383
+ }
1384
+ if (audioRef.current) {
1385
+ audioRef.current.onerror = null;
1386
+ audioRef.current.onended = null;
1387
+ audioRef.current.onpause = null;
1388
+ audioRef.current.onplay = null;
1389
+ audioRef.current.ontimeupdate = null;
1390
+ audioRef.current.pause();
1391
+ audioRef.current.src = "";
1392
+ audioRef.current = null;
1393
+ }
1394
+ if (sourceRef.current) {
1395
+ try {
1396
+ sourceRef.current.disconnect();
1397
+ } catch (e) {
1398
+ }
1399
+ sourceRef.current = null;
1400
+ }
1401
+ }, []);
1402
+ const stopOthers = (0, import_react4.useCallback)(() => {
1403
+ activeInstances2.forEach((instance, id) => {
1404
+ if (id !== instanceId) {
1405
+ instance.pause();
1406
+ }
1407
+ });
1408
+ }, [instanceId]);
1409
+ const pause = (0, import_react4.useCallback)(() => {
1410
+ if (audioRef.current) {
1411
+ audioRef.current.pause();
1412
+ }
1413
+ setIsPaused(true);
1414
+ setIsPlaying(false);
1415
+ onPlayPause?.();
1416
+ }, [onPlayPause]);
1417
+ const resume = (0, import_react4.useCallback)(() => {
1418
+ stopOthers();
1419
+ if (audioRef.current) {
1420
+ audioRef.current.play();
1421
+ }
1422
+ setIsPaused(false);
1423
+ setIsPlaying(true);
1424
+ onPlayResume?.();
1425
+ activeInstances2.set(instanceId, { pause });
1426
+ }, [stopOthers, instanceId, pause, onPlayResume]);
1427
+ const sendNextSegment = (0, import_react4.useCallback)(() => {
1428
+ if (!clientRef.current || !isSessionStartedRef.current || isSendingRef.current || isSessionFinishedRef.current) {
1429
+ return;
1430
+ }
1431
+ if (segmentQueueRef.current.length === 0) {
1432
+ if (isStreamFinishedRef.current && !isSessionFinishedRef.current) {
1433
+ console.log("[useStreamTTS] All segments sent, finishing session");
1434
+ isSessionFinishedRef.current = true;
1435
+ clientRef.current.finishSession();
1436
+ resolveAllSegmentsSentRef.current?.();
1437
+ }
1438
+ return;
1439
+ }
1440
+ isSendingRef.current = true;
1441
+ const segment = segmentQueueRef.current.shift();
1442
+ console.log(`[useStreamTTS] Sending segment ${segment.index}: ${segment.content.substring(0, 30)}...`);
1443
+ clientRef.current.sendText(segment.content);
1444
+ segment.sent = true;
1445
+ isSendingRef.current = false;
1446
+ setTimeout(() => sendNextSegment(), 0);
1447
+ }, []);
1448
+ const stop = (0, import_react4.useCallback)(() => {
1449
+ if (clientRef.current) {
1450
+ clientRef.current.close();
1451
+ clientRef.current = null;
1452
+ }
1453
+ cleanupAudio();
1454
+ setIsConnected(false);
1455
+ isConnectedRef.current = false;
1456
+ setIsSessionStarted(false);
1457
+ isSessionStartedRef.current = false;
1458
+ calledSessionStartedRef.current = false;
1459
+ setIsPlaying(false);
1460
+ setIsPaused(false);
1461
+ setIsSynthesizing(false);
1462
+ setProgress(0);
1463
+ activeInstances2.delete(instanceId);
1464
+ streamTextRef.current = "";
1465
+ setStreamText("");
1466
+ segmentQueueRef.current = [];
1467
+ isSendingRef.current = false;
1468
+ sessionAudioBuffersRef.current = [];
1469
+ isStreamFinishedRef.current = false;
1470
+ isSessionFinishedRef.current = false;
1471
+ splitterRef.current?.reset();
1472
+ }, [cleanupAudio, instanceId]);
1473
+ const connect = (0, import_react4.useCallback)(async () => {
1474
+ stop();
1475
+ setErrorState(null);
1476
+ setProgress(0);
1477
+ sessionAudioBuffersRef.current = [];
1478
+ isStreamFinishedRef.current = false;
1479
+ streamTextRef.current = "";
1480
+ setStreamText("");
1481
+ segmentQueueRef.current = [];
1482
+ isSendingRef.current = false;
1483
+ isSessionStartedRef.current = false;
1484
+ calledSessionStartedRef.current = false;
1485
+ setIsSessionStarted(false);
1486
+ const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
1487
+ currentVoiceRef.current = voice;
1488
+ const startTime = Date.now();
1489
+ metricsCollector.record({
1490
+ name: "tts_request",
1491
+ labels: { voice, text_length: 0 },
1492
+ value: 1,
1493
+ timestamp: startTime
1494
+ });
1495
+ try {
1496
+ const audio = new Audio();
1497
+ audio.crossOrigin = "anonymous";
1498
+ audioRef.current = audio;
1499
+ audio.onplay = () => {
1500
+ setIsPlaying(true);
1501
+ setIsPaused(false);
1502
+ onPlayStart?.();
1503
+ initAudioContext();
1504
+ activeInstances2.set(instanceId, { pause });
1505
+ };
1506
+ audio.onended = () => {
1507
+ setIsPlaying(false);
1508
+ setIsPaused(false);
1509
+ onPlayEnd?.();
1510
+ activeInstances2.delete(instanceId);
1511
+ };
1512
+ audio.onerror = (e) => {
1513
+ console.error("[useStreamTTS] Audio playback error:", e, audio.error);
1514
+ setErrorState(audio.error?.message || "Audio playback error");
1515
+ onError?.(new Error(audio.error?.message || "Audio playback error"));
1516
+ };
1517
+ audio.ontimeupdate = () => {
1518
+ let duration = audio.duration;
1519
+ if (!isFinite(duration) && audio.buffered.length > 0) {
1520
+ duration = audio.buffered.end(audio.buffered.length - 1);
1521
+ }
1522
+ if (isFinite(duration) && duration > 0) {
1523
+ setProgress(audio.currentTime / duration * 100);
1524
+ }
1525
+ };
1526
+ clientRef.current = (0, import_tts3.WebsocketMSE)({ autoStartSession: false });
1527
+ splitterRef.current = new StreamingTextSplitter({
1528
+ maxLength: maxSegmentLength,
1529
+ onSegmentComplete: (segment) => {
1530
+ segmentQueueRef.current.push(segment);
1531
+ console.log(`[useStreamTTS] Segment ${segment.index} queued (${segment.length} chars)`);
1532
+ if (isSessionStartedRef.current) {
1533
+ sendNextSegment();
1534
+ }
1535
+ },
1536
+ onAllComplete: () => {
1537
+ console.log(`[useStreamTTS] All segments completed, total: ${segmentQueueRef.current.length} in queue`);
1538
+ }
1539
+ });
1540
+ const url = clientRef.current.start({
1541
+ url: buildFullUrl3(WS_URL2, {
1542
+ api_access_key: `Jwt; ${ttsConfig.token}`,
1543
+ api_app_key: ttsConfig.appid,
1544
+ api_resource_id: ttsConfig.resourceId || "seed-tts-2.0"
1545
+ }),
1546
+ config: {
1547
+ user: {
1548
+ uid: `req-${Date.now()}`
1549
+ },
1550
+ namespace: ttsConfig.namespace || "BidirectionalTTS",
1551
+ req_params: {
1552
+ speaker: voice,
1553
+ audio_params: {
1554
+ sample_rate: audioParams?.sample_rate || 24e3,
1555
+ format: audioParams?.format || "mp3",
1556
+ speech_rate: audioParams?.speech_rate,
1557
+ pitch_rate: audioParams?.pitch_rate,
1558
+ loudness_rate: audioParams?.loudness_rate
1559
+ },
1560
+ additions: JSON.stringify({
1561
+ enable_language_detector: true,
1562
+ disable_markdown_filter: true,
1563
+ enable_latex_tn: true
1564
+ })
1565
+ }
1566
+ },
1567
+ // ===== 关键回调 =====
1568
+ onStart: () => {
1569
+ setIsConnected(true);
1570
+ isConnectedRef.current = true;
1571
+ console.log("[useStreamTTS] WebSocket connected, waiting for text...");
1572
+ },
1573
+ onSessionStarted: () => {
1574
+ setIsSessionStarted(true);
1575
+ isSessionStartedRef.current = true;
1576
+ console.log("[useStreamTTS] Session started, can send text now");
1577
+ if (segmentQueueRef.current.length > 0) {
1578
+ sendNextSegment();
1579
+ }
1580
+ },
1581
+ onMessage: (data) => {
1582
+ setIsSynthesizing(true);
1583
+ if (sessionAudioBuffersRef.current.length === 0) {
1584
+ metricsCollector.record({
1585
+ name: "tts_latency",
1586
+ labels: { stage: "first_packet", voice },
1587
+ value: Date.now() - startTime,
1588
+ timestamp: Date.now()
1589
+ });
1590
+ }
1591
+ const buffer = data instanceof ArrayBuffer ? data.slice(0) : new Uint8Array(data).buffer;
1592
+ sessionAudioBuffersRef.current.push(buffer);
1593
+ },
1594
+ onSessionFinished: () => {
1595
+ setIsSynthesizing(false);
1596
+ setIsSessionStarted(false);
1597
+ isSessionStartedRef.current = false;
1598
+ calledSessionStartedRef.current = false;
1599
+ if (sessionAudioBuffersRef.current.length > 0 && streamTextRef.current) {
1600
+ const speed = audioParams?.speech_rate || 0;
1601
+ const cacheKey = TTSCache.generateKey(streamTextRef.current, voice, speed);
1602
+ TTSCache.set(cacheKey, [...sessionAudioBuffersRef.current]);
1603
+ sessionAudioCache.set(instanceId, {
1604
+ streamText: streamTextRef.current,
1605
+ audioBuffers: [...sessionAudioBuffersRef.current],
1606
+ timestamp: Date.now(),
1607
+ voice,
1608
+ speed
1609
+ });
1610
+ console.log(`[useStreamTTS] Session finished, cached ${sessionAudioBuffersRef.current.length} audio buffers`);
1611
+ }
1612
+ metricsCollector.record({
1613
+ name: "tts_synthesis_finished",
1614
+ labels: { voice, text_length: streamTextRef.current.length },
1615
+ value: Date.now() - startTime,
1616
+ timestamp: Date.now()
1617
+ });
1618
+ },
1619
+ onError: (err) => {
1620
+ console.error("[useStreamTTS] TTS error:", err);
1621
+ setErrorState(err.msg || "TTS error");
1622
+ onError?.(new Error(err.msg || "TTS error"));
1623
+ setIsSynthesizing(false);
1624
+ }
1625
+ });
1626
+ audioUrlRef.current = url;
1627
+ audio.src = url;
1628
+ if (autoPlay) {
1629
+ try {
1630
+ await audio.play();
1631
+ } catch (e) {
1632
+ console.warn("[useStreamTTS] Autoplay blocked:", e);
1633
+ }
1634
+ }
1635
+ } catch (err) {
1636
+ console.error("[useStreamTTS] Connect error:", err);
1637
+ setErrorState(String(err));
1638
+ onError?.(err instanceof Error ? err : new Error(String(err)));
1639
+ }
1640
+ }, [
1641
+ ttsConfig,
1642
+ audioParams,
1643
+ autoPlay,
1644
+ stop,
1645
+ instanceId,
1646
+ onPlayStart,
1647
+ onPlayEnd,
1648
+ initAudioContext,
1649
+ pause,
1650
+ metricsCollector,
1651
+ maxSegmentLength,
1652
+ sendNextSegment,
1653
+ onError
1654
+ ]);
1655
+ const onMessage = (0, import_react4.useCallback)((chunk) => {
1656
+ if (!chunk) return;
1657
+ streamTextRef.current += chunk;
1658
+ setStreamText(streamTextRef.current);
1659
+ if (!calledSessionStartedRef.current && !isSessionStartedRef.current && clientRef.current && isConnectedRef.current) {
1660
+ console.log("[useStreamTTS] First text received, starting session...");
1661
+ calledSessionStartedRef.current = true;
1662
+ clientRef.current.startSession();
1663
+ }
1664
+ splitterRef.current?.onChunk(chunk);
1665
+ }, []);
1666
+ const finishStream = (0, import_react4.useCallback)(async () => {
1667
+ isStreamFinishedRef.current = true;
1668
+ splitterRef.current?.complete();
1669
+ console.log(`[useStreamTTS] Stream finished, ${segmentQueueRef.current.length} segments remaining in queue`);
1670
+ if (segmentQueueRef.current.length > 0 || isSendingRef.current) {
1671
+ await new Promise((resolve) => {
1672
+ resolveAllSegmentsSentRef.current = resolve;
1673
+ });
1674
+ } else if (clientRef.current && isSessionStartedRef.current && !isSessionFinishedRef.current) {
1675
+ isSessionFinishedRef.current = true;
1676
+ clientRef.current.finishSession();
1677
+ }
1678
+ }, []);
1679
+ const seek = (0, import_react4.useCallback)((percentage) => {
1680
+ if (audioRef.current) {
1681
+ let duration = audioRef.current.duration;
1682
+ if (!isFinite(duration) && audioRef.current.buffered.length > 0) {
1683
+ duration = audioRef.current.buffered.end(audioRef.current.buffered.length - 1);
1684
+ }
1685
+ if (isFinite(duration) && duration > 0) {
1686
+ const time = percentage / 100 * duration;
1687
+ if (isFinite(time)) {
1688
+ audioRef.current.currentTime = time;
1689
+ setProgress(percentage);
1690
+ }
1691
+ }
1692
+ }
1693
+ }, []);
1694
+ const getFrequencyData = (0, import_react4.useCallback)(() => {
1695
+ if (!analyserRef.current) return new Uint8Array(0);
1696
+ const dataArray = new Uint8Array(analyserRef.current.frequencyBinCount);
1697
+ analyserRef.current.getByteFrequencyData(dataArray);
1698
+ return dataArray;
1699
+ }, []);
1700
+ const getTimeDomainData = (0, import_react4.useCallback)(() => {
1701
+ if (!analyserRef.current) return new Uint8Array(0);
1702
+ const dataArray = new Uint8Array(analyserRef.current.frequencyBinCount);
1703
+ analyserRef.current.getByteTimeDomainData(dataArray);
1704
+ return dataArray;
1705
+ }, []);
1706
+ (0, import_react4.useEffect)(() => {
1707
+ if (!visualization?.enabled) return;
1708
+ let animId;
1709
+ let lastUpdate = 0;
1710
+ const interval = visualization.refreshInterval || 0;
1711
+ const update = (timestamp) => {
1712
+ if (isPlaying && !isPaused) {
1713
+ if (timestamp - lastUpdate >= interval) {
1714
+ setVisualizationData({
1715
+ frequencyData: getFrequencyData(),
1716
+ timeDomainData: getTimeDomainData()
1717
+ });
1718
+ lastUpdate = timestamp;
1719
+ }
1720
+ animId = requestAnimationFrame(update);
1721
+ }
1722
+ };
1723
+ if (isPlaying && !isPaused) {
1724
+ animId = requestAnimationFrame(update);
1725
+ }
1726
+ return () => {
1727
+ if (animId) cancelAnimationFrame(animId);
1728
+ };
1729
+ }, [isPlaying, isPaused, visualization, getFrequencyData, getTimeDomainData]);
1730
+ (0, import_react4.useEffect)(() => {
1731
+ return () => {
1732
+ stop();
1733
+ if (audioContextRef.current) {
1734
+ audioContextRef.current.close();
1735
+ }
1736
+ };
1737
+ }, [stop]);
1738
+ return {
1739
+ isConnected,
1740
+ isSessionStarted,
1741
+ isSynthesizing,
1742
+ isPlaying,
1743
+ isPaused,
1744
+ error,
1745
+ streamText,
1746
+ progress,
1747
+ connect,
1748
+ onMessage,
1749
+ finishStream,
1750
+ pause,
1751
+ resume,
1752
+ stop,
1753
+ seek,
1754
+ getFrequencyData,
1755
+ getTimeDomainData,
1756
+ visualizationData
1757
+ };
1758
+ }
1759
+ function getSessionAudioCache(instanceId) {
1760
+ return sessionAudioCache.get(instanceId);
1761
+ }
1762
+ function clearSessionAudioCache(instanceId) {
1763
+ sessionAudioCache.delete(instanceId);
1764
+ }
1765
+ function findSessionCacheByText(streamText, voice, speed) {
1766
+ for (const entry of sessionAudioCache.values()) {
1767
+ if (entry.streamText === streamText && entry.voice === voice && entry.speed === speed) {
1768
+ return entry;
1769
+ }
1770
+ }
1771
+ return void 0;
1772
+ }
1773
+
1774
+ // src/components/AudioWaveVisualizer.tsx
1775
+ var import_react5 = require("react");
1030
1776
  var import_jsx_runtime = require("react/jsx-runtime");
1031
1777
  var AudioWaveVisualizer = ({
1032
1778
  isPlaying,
@@ -1041,8 +1787,8 @@ var AudioWaveVisualizer = ({
1041
1787
  className,
1042
1788
  styleObj
1043
1789
  }) => {
1044
- const canvasRef = (0, import_react4.useRef)(null);
1045
- const requestRef = (0, import_react4.useRef)(null);
1790
+ const canvasRef = (0, import_react5.useRef)(null);
1791
+ const requestRef = (0, import_react5.useRef)(null);
1046
1792
  const progressBackground = Array.isArray(color) ? `linear-gradient(90deg, ${color[0]}, ${color[1]})` : color;
1047
1793
  const textColor = Array.isArray(color) ? color[0] : color;
1048
1794
  const draw = () => {
@@ -1107,7 +1853,7 @@ var AudioWaveVisualizer = ({
1107
1853
  requestRef.current = requestAnimationFrame(draw);
1108
1854
  }
1109
1855
  };
1110
- (0, import_react4.useEffect)(() => {
1856
+ (0, import_react5.useEffect)(() => {
1111
1857
  if (isPlaying && !isPaused) {
1112
1858
  requestRef.current = requestAnimationFrame(draw);
1113
1859
  } else {
@@ -1151,7 +1897,7 @@ var AudioWaveVisualizer = ({
1151
1897
  var AudioWaveVisualizer_default = AudioWaveVisualizer;
1152
1898
 
1153
1899
  // src/components/AudioProgressBar.tsx
1154
- var import_react5 = require("react");
1900
+ var import_react6 = require("react");
1155
1901
  var import_jsx_runtime2 = require("react/jsx-runtime");
1156
1902
  var AudioProgressBar = ({
1157
1903
  progress,
@@ -1164,16 +1910,16 @@ var AudioProgressBar = ({
1164
1910
  className,
1165
1911
  style
1166
1912
  }) => {
1167
- const progressBarRef = (0, import_react5.useRef)(null);
1168
- const containerRef = (0, import_react5.useRef)(null);
1169
- const progressTextRef = (0, import_react5.useRef)(null);
1170
- const thumbRef = (0, import_react5.useRef)(null);
1171
- const displayedProgress = (0, import_react5.useRef)(0);
1172
- const requestRef = (0, import_react5.useRef)(null);
1173
- const isDragging = (0, import_react5.useRef)(false);
1174
- const isHovering = (0, import_react5.useRef)(false);
1175
- const isTouch = (0, import_react5.useRef)(false);
1176
- (0, import_react5.useEffect)(() => {
1913
+ const progressBarRef = (0, import_react6.useRef)(null);
1914
+ const containerRef = (0, import_react6.useRef)(null);
1915
+ const progressTextRef = (0, import_react6.useRef)(null);
1916
+ const thumbRef = (0, import_react6.useRef)(null);
1917
+ const displayedProgress = (0, import_react6.useRef)(0);
1918
+ const requestRef = (0, import_react6.useRef)(null);
1919
+ const isDragging = (0, import_react6.useRef)(false);
1920
+ const isHovering = (0, import_react6.useRef)(false);
1921
+ const isTouch = (0, import_react6.useRef)(false);
1922
+ (0, import_react6.useEffect)(() => {
1177
1923
  const match = window.matchMedia("(pointer: coarse)");
1178
1924
  isTouch.current = match.matches;
1179
1925
  if (isTouch.current && thumbRef.current) {
@@ -1212,7 +1958,7 @@ var AudioProgressBar = ({
1212
1958
  requestRef.current = requestAnimationFrame(animate);
1213
1959
  }
1214
1960
  };
1215
- (0, import_react5.useEffect)(() => {
1961
+ (0, import_react6.useEffect)(() => {
1216
1962
  requestRef.current = requestAnimationFrame(animate);
1217
1963
  return () => {
1218
1964
  if (requestRef.current) {
@@ -1399,8 +2145,13 @@ var AudioProgressBar_default = AudioProgressBar;
1399
2145
  0 && (module.exports = {
1400
2146
  AudioProgressBar,
1401
2147
  AudioWaveVisualizer,
2148
+ StreamingTextSplitter,
2149
+ clearSessionAudioCache,
2150
+ findSessionCacheByText,
2151
+ getSessionAudioCache,
1402
2152
  splitTextByDelimiters,
1403
2153
  useMessageTTS,
2154
+ useStreamTTS,
1404
2155
  useVolcanoASR,
1405
2156
  useVolcanoTTS
1406
2157
  });