@wq-hook/volcano-react 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +138 -40
- package/dist/index.d.ts +138 -40
- package/dist/index.js +1182 -1107
- package/dist/index.mjs +1181 -1104
- package/package.json +2 -2
package/dist/index.mjs
CHANGED
|
@@ -396,10 +396,253 @@ function useVolcanoTTS({
|
|
|
396
396
|
}
|
|
397
397
|
|
|
398
398
|
// src/tts/useMessageTTS.ts
|
|
399
|
+
import { useCallback as useCallback3, useEffect as useEffect2, useRef as useRef3, useState as useState3 } from "react";
|
|
400
|
+
|
|
401
|
+
// src/tts/StreamPlaybackManager.ts
|
|
399
402
|
import { WebsocketMSE as WebsocketMSE2 } from "@wq-hook/volcano-sdk/tts";
|
|
403
|
+
|
|
404
|
+
// src/tts/StreamingTextSplitter.ts
|
|
400
405
|
import { MarkdownFormatter as MarkdownFormatter2 } from "@wq-hook/volcano-sdk";
|
|
401
|
-
import { useCallback as useCallback3, useEffect as useEffect2, useRef as useRef3, useState as useState3 } from "react";
|
|
402
406
|
import emojiRegex2 from "emoji-regex";
|
|
407
|
+
var StreamingTextSplitter = class {
|
|
408
|
+
constructor(options = {}) {
|
|
409
|
+
/** 当前缓冲区 */
|
|
410
|
+
this.buffer = "";
|
|
411
|
+
/** 分段索引计数器 */
|
|
412
|
+
this.segmentIndex = 0;
|
|
413
|
+
/** 已完成的分段列表 */
|
|
414
|
+
this.segments = [];
|
|
415
|
+
/** 是否已完成 */
|
|
416
|
+
this.isCompleted = false;
|
|
417
|
+
this.maxLength = options.maxLength || 150;
|
|
418
|
+
this.minLength = options.minLength || 10;
|
|
419
|
+
this.onSegmentComplete = options.onSegmentComplete;
|
|
420
|
+
this.onAllComplete = options.onAllComplete;
|
|
421
|
+
}
|
|
422
|
+
/**
|
|
423
|
+
* 接收流式文本块
|
|
424
|
+
* @param chunk - 文本块
|
|
425
|
+
*/
|
|
426
|
+
onChunk(chunk) {
|
|
427
|
+
if (!chunk || this.isCompleted) return;
|
|
428
|
+
this.buffer += chunk;
|
|
429
|
+
if (this.detectBoundary(chunk)) {
|
|
430
|
+
const newlineIndex = this.buffer.indexOf("\n");
|
|
431
|
+
if (newlineIndex !== -1) {
|
|
432
|
+
if (newlineIndex === 0) {
|
|
433
|
+
this.buffer = this.buffer.substring(1);
|
|
434
|
+
return;
|
|
435
|
+
}
|
|
436
|
+
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
437
|
+
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
438
|
+
this.flushSegmentWithBuffer(segmentBuffer);
|
|
439
|
+
while (this.buffer.includes("\n")) {
|
|
440
|
+
const nextNewlineIndex = this.buffer.indexOf("\n");
|
|
441
|
+
if (nextNewlineIndex === 0) {
|
|
442
|
+
this.buffer = this.buffer.substring(1);
|
|
443
|
+
continue;
|
|
444
|
+
}
|
|
445
|
+
const nextSegmentBuffer = this.buffer.substring(0, nextNewlineIndex);
|
|
446
|
+
this.buffer = this.buffer.substring(nextNewlineIndex + 1);
|
|
447
|
+
this.flushSegmentWithBuffer(nextSegmentBuffer);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
/**
|
|
453
|
+
* 检测分段边界
|
|
454
|
+
* @param chunk - 最新接收的文本块
|
|
455
|
+
* @returns 是否应该分段
|
|
456
|
+
*/
|
|
457
|
+
detectBoundary(chunk) {
|
|
458
|
+
if (chunk.includes("\n")) {
|
|
459
|
+
if (this.buffer.length >= this.maxLength) {
|
|
460
|
+
this.forceSplitAtSentenceBoundary();
|
|
461
|
+
}
|
|
462
|
+
return true;
|
|
463
|
+
}
|
|
464
|
+
if (this.buffer.length >= this.maxLength) {
|
|
465
|
+
this.forceSplitAtSentenceBoundary();
|
|
466
|
+
return true;
|
|
467
|
+
}
|
|
468
|
+
return false;
|
|
469
|
+
}
|
|
470
|
+
/**
|
|
471
|
+
* 在句子边界强制拆分超长段落
|
|
472
|
+
*/
|
|
473
|
+
forceSplitAtSentenceBoundary() {
|
|
474
|
+
const content = this.buffer;
|
|
475
|
+
const sentenceEnders = /[。?!]/g;
|
|
476
|
+
let lastMatch = null;
|
|
477
|
+
let match = null;
|
|
478
|
+
while ((match = sentenceEnders.exec(content)) !== null) {
|
|
479
|
+
lastMatch = match;
|
|
480
|
+
}
|
|
481
|
+
if (lastMatch && lastMatch.index > this.minLength) {
|
|
482
|
+
const splitPoint = lastMatch.index + 1;
|
|
483
|
+
const firstPart = content.substring(0, splitPoint);
|
|
484
|
+
const secondPart = content.substring(splitPoint);
|
|
485
|
+
this.buffer = firstPart;
|
|
486
|
+
this.flushSegment();
|
|
487
|
+
this.buffer = secondPart;
|
|
488
|
+
} else {
|
|
489
|
+
const midPoint = Math.floor(content.length / 2);
|
|
490
|
+
const firstPart = content.substring(0, midPoint);
|
|
491
|
+
const secondPart = content.substring(midPoint);
|
|
492
|
+
this.buffer = firstPart;
|
|
493
|
+
this.flushSegment();
|
|
494
|
+
this.buffer = secondPart;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
/**
|
|
498
|
+
* 使用指定缓冲区内容刷新为分段
|
|
499
|
+
* @param bufferToFlush - 要分段的缓冲区内容
|
|
500
|
+
*/
|
|
501
|
+
flushSegmentWithBuffer(bufferToFlush) {
|
|
502
|
+
const content = bufferToFlush;
|
|
503
|
+
if (!content) return;
|
|
504
|
+
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
505
|
+
const isTooShort = content.length < 3;
|
|
506
|
+
if (isPureSymbols && isTooShort) {
|
|
507
|
+
return;
|
|
508
|
+
}
|
|
509
|
+
const formattedContent = MarkdownFormatter2.format(content).replace(emojiRegex2(), "");
|
|
510
|
+
if (!formattedContent) return;
|
|
511
|
+
let subSegments = [formattedContent];
|
|
512
|
+
if (formattedContent.length > this.maxLength) {
|
|
513
|
+
subSegments = this.splitLongSegment(formattedContent);
|
|
514
|
+
}
|
|
515
|
+
for (const subSegment of subSegments) {
|
|
516
|
+
if (!subSegment) continue;
|
|
517
|
+
const segment = {
|
|
518
|
+
index: this.segmentIndex++,
|
|
519
|
+
content: subSegment,
|
|
520
|
+
length: subSegment.length,
|
|
521
|
+
sent: false
|
|
522
|
+
};
|
|
523
|
+
this.segments.push(segment);
|
|
524
|
+
this.onSegmentComplete?.(segment);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
/**
|
|
528
|
+
* 刷新当前缓冲区为分段
|
|
529
|
+
*/
|
|
530
|
+
flushSegment() {
|
|
531
|
+
const content = this.buffer.trim();
|
|
532
|
+
if (!content) {
|
|
533
|
+
this.buffer = "";
|
|
534
|
+
return;
|
|
535
|
+
}
|
|
536
|
+
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
537
|
+
const isTooShort = content.length < 3;
|
|
538
|
+
if (isPureSymbols && isTooShort) {
|
|
539
|
+
this.buffer = "";
|
|
540
|
+
return;
|
|
541
|
+
}
|
|
542
|
+
const formattedContent = MarkdownFormatter2.format(content).replace(emojiRegex2(), "");
|
|
543
|
+
if (!formattedContent) {
|
|
544
|
+
this.buffer = "";
|
|
545
|
+
return;
|
|
546
|
+
}
|
|
547
|
+
let subSegments = [formattedContent];
|
|
548
|
+
if (formattedContent.length > this.maxLength) {
|
|
549
|
+
subSegments = this.splitLongSegment(formattedContent);
|
|
550
|
+
}
|
|
551
|
+
for (const subSegment of subSegments) {
|
|
552
|
+
if (!subSegment) continue;
|
|
553
|
+
const segment = {
|
|
554
|
+
index: this.segmentIndex++,
|
|
555
|
+
content: subSegment,
|
|
556
|
+
length: subSegment.length,
|
|
557
|
+
sent: false
|
|
558
|
+
};
|
|
559
|
+
this.segments.push(segment);
|
|
560
|
+
this.onSegmentComplete?.(segment);
|
|
561
|
+
}
|
|
562
|
+
this.buffer = "";
|
|
563
|
+
}
|
|
564
|
+
/**
|
|
565
|
+
* 拆分超长分段
|
|
566
|
+
* @param segment - 超长的分段
|
|
567
|
+
* @returns 拆分后的分段数组
|
|
568
|
+
*/
|
|
569
|
+
splitLongSegment(segment) {
|
|
570
|
+
const result = [];
|
|
571
|
+
let current = "";
|
|
572
|
+
for (const char of segment) {
|
|
573
|
+
current += char;
|
|
574
|
+
const shouldSplit = /[。?!,,]/.test(char);
|
|
575
|
+
if (shouldSplit && current.length <= this.maxLength) {
|
|
576
|
+
result.push(current);
|
|
577
|
+
current = "";
|
|
578
|
+
} else if (current.length >= this.maxLength) {
|
|
579
|
+
result.push(current);
|
|
580
|
+
current = "";
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
if (current) {
|
|
584
|
+
result.push(current);
|
|
585
|
+
}
|
|
586
|
+
return result.filter((s) => s.length > 0);
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* 完成流式输入
|
|
590
|
+
* 处理剩余的缓冲区内容
|
|
591
|
+
*/
|
|
592
|
+
complete() {
|
|
593
|
+
if (this.isCompleted) return;
|
|
594
|
+
this.isCompleted = true;
|
|
595
|
+
while (this.buffer.includes("\n")) {
|
|
596
|
+
const newlineIndex = this.buffer.indexOf("\n");
|
|
597
|
+
if (newlineIndex === 0) {
|
|
598
|
+
this.buffer = this.buffer.substring(1);
|
|
599
|
+
continue;
|
|
600
|
+
}
|
|
601
|
+
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
602
|
+
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
603
|
+
this.flushSegmentWithBuffer(segmentBuffer);
|
|
604
|
+
}
|
|
605
|
+
if (this.buffer.trim()) {
|
|
606
|
+
this.flushSegment();
|
|
607
|
+
}
|
|
608
|
+
this.onAllComplete?.(this.segments);
|
|
609
|
+
}
|
|
610
|
+
/**
|
|
611
|
+
* 重置分段器状态
|
|
612
|
+
*/
|
|
613
|
+
reset() {
|
|
614
|
+
this.buffer = "";
|
|
615
|
+
this.segmentIndex = 0;
|
|
616
|
+
this.segments = [];
|
|
617
|
+
this.isCompleted = false;
|
|
618
|
+
}
|
|
619
|
+
/**
|
|
620
|
+
* 获取当前缓冲区内容
|
|
621
|
+
*/
|
|
622
|
+
getBuffer() {
|
|
623
|
+
return this.buffer;
|
|
624
|
+
}
|
|
625
|
+
/**
|
|
626
|
+
* 获取已分段的列表
|
|
627
|
+
*/
|
|
628
|
+
getSegments() {
|
|
629
|
+
return this.segments;
|
|
630
|
+
}
|
|
631
|
+
/**
|
|
632
|
+
* 获取统计信息
|
|
633
|
+
*/
|
|
634
|
+
getStats() {
|
|
635
|
+
return {
|
|
636
|
+
bufferLength: this.buffer.length,
|
|
637
|
+
segmentCount: this.segments.length,
|
|
638
|
+
totalChars: this.segments.reduce((sum, seg) => sum + seg.length, 0)
|
|
639
|
+
};
|
|
640
|
+
}
|
|
641
|
+
};
|
|
642
|
+
|
|
643
|
+
// src/tts/StreamPlaybackManager.ts
|
|
644
|
+
import emojiRegex3 from "emoji-regex";
|
|
645
|
+
import { MarkdownFormatter as MarkdownFormatter3 } from "@wq-hook/volcano-sdk";
|
|
403
646
|
|
|
404
647
|
// src/tts/TextSplitter.ts
|
|
405
648
|
function splitTextByDelimiters(text, minLength = 10, maxLength = 150) {
|
|
@@ -469,338 +712,217 @@ function splitTextByDelimiters(text, minLength = 10, maxLength = 150) {
|
|
|
469
712
|
return segments;
|
|
470
713
|
}
|
|
471
714
|
|
|
472
|
-
// src/tts/
|
|
473
|
-
var NoopMetricsCollector = class {
|
|
474
|
-
record(_metric) {
|
|
475
|
-
}
|
|
476
|
-
};
|
|
477
|
-
|
|
478
|
-
// src/tts/useMessageTTS.ts
|
|
715
|
+
// src/tts/StreamPlaybackManager.ts
|
|
479
716
|
var WS_URL = "wss://openspeech.bytedance.com/api/v3/tts/bidirection";
|
|
480
|
-
var activeInstances = /* @__PURE__ */ new Map();
|
|
481
717
|
function buildFullUrl2(url, params) {
|
|
482
|
-
const { ...auth } = params;
|
|
483
718
|
const arr = [];
|
|
484
|
-
for (const key in
|
|
485
|
-
if (Object.prototype.hasOwnProperty.call(
|
|
719
|
+
for (const key in params) {
|
|
720
|
+
if (Object.prototype.hasOwnProperty.call(params, key)) {
|
|
486
721
|
arr.push(
|
|
487
|
-
`${key}=${encodeURIComponent(
|
|
722
|
+
`${key}=${encodeURIComponent(params[key])}`
|
|
488
723
|
);
|
|
489
724
|
}
|
|
490
725
|
}
|
|
491
726
|
return `${url}?${arr.join("&")}`;
|
|
492
727
|
}
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
728
|
+
var PlaybackSession = class {
|
|
729
|
+
// 标记是否正在停止,用于区分 stop() 和 pause()
|
|
730
|
+
constructor(id, config) {
|
|
731
|
+
this.listeners = /* @__PURE__ */ new Set();
|
|
732
|
+
this.audioContext = null;
|
|
733
|
+
this.analyser = null;
|
|
734
|
+
this.source = null;
|
|
735
|
+
this.audioUrl = null;
|
|
736
|
+
// TTS Resources
|
|
737
|
+
this.client = null;
|
|
738
|
+
this.splitter = null;
|
|
739
|
+
// Internal State
|
|
740
|
+
this.segmentQueue = [];
|
|
741
|
+
this.isSending = false;
|
|
742
|
+
this.isSessionStarting = false;
|
|
743
|
+
this.streamText = "";
|
|
744
|
+
this.sessionAudioBuffers = [];
|
|
745
|
+
this.isStreamFinished = false;
|
|
746
|
+
this.isSessionFinished = false;
|
|
747
|
+
this.resolveAllSegmentsSent = null;
|
|
748
|
+
this.animId = null;
|
|
749
|
+
this.lastVisUpdate = 0;
|
|
750
|
+
// Blob URL 管理状态
|
|
751
|
+
this.pausedTime = 0;
|
|
752
|
+
// 记录暂停时的播放位置
|
|
753
|
+
this.cachedAudioData = null;
|
|
754
|
+
// 缓存音频数据,用于恢复时重新创建 Blob URL
|
|
755
|
+
this.isStopping = false;
|
|
756
|
+
this.id = id;
|
|
757
|
+
this.config = config;
|
|
758
|
+
this.state = {
|
|
759
|
+
isPlaying: false,
|
|
760
|
+
isPaused: false,
|
|
761
|
+
isSynthesizing: false,
|
|
762
|
+
progress: 0,
|
|
763
|
+
visualizationData: {
|
|
764
|
+
frequencyData: new Uint8Array(0),
|
|
765
|
+
timeDomainData: new Uint8Array(0)
|
|
766
|
+
},
|
|
767
|
+
error: null,
|
|
768
|
+
isConnected: false,
|
|
769
|
+
isSessionStarted: false,
|
|
770
|
+
isStreamFinished: false
|
|
771
|
+
};
|
|
772
|
+
this.audio = new Audio();
|
|
773
|
+
this.audio.crossOrigin = "anonymous";
|
|
774
|
+
this.setupAudioListeners();
|
|
775
|
+
}
|
|
776
|
+
/**
|
|
777
|
+
* 初始化 AudioContext(用于可视化)
|
|
778
|
+
*/
|
|
779
|
+
initAudioContext() {
|
|
780
|
+
if (!this.audioContext) {
|
|
542
781
|
const AudioContextClass = window.AudioContext || window.webkitAudioContext;
|
|
543
|
-
|
|
782
|
+
this.audioContext = new AudioContextClass();
|
|
544
783
|
}
|
|
545
|
-
if (
|
|
546
|
-
|
|
784
|
+
if (this.audioContext.state === "suspended") {
|
|
785
|
+
this.audioContext.resume();
|
|
547
786
|
}
|
|
548
|
-
if (!
|
|
549
|
-
|
|
550
|
-
|
|
787
|
+
if (!this.analyser && this.audioContext) {
|
|
788
|
+
this.analyser = this.audioContext.createAnalyser();
|
|
789
|
+
this.analyser.fftSize = this.config.visualization?.fftSize || 256;
|
|
551
790
|
}
|
|
552
|
-
if (!
|
|
791
|
+
if (!this.source && this.audioContext && this.analyser) {
|
|
553
792
|
try {
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
);
|
|
557
|
-
sourceRef.current.connect(analyserRef.current);
|
|
558
|
-
analyserRef.current.connect(audioContextRef.current.destination);
|
|
793
|
+
this.source = this.audioContext.createMediaElementSource(this.audio);
|
|
794
|
+
this.source.connect(this.analyser);
|
|
795
|
+
this.analyser.connect(this.audioContext.destination);
|
|
559
796
|
} catch (e) {
|
|
560
797
|
}
|
|
561
798
|
}
|
|
562
|
-
}
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
799
|
+
}
|
|
800
|
+
setupAudioListeners() {
|
|
801
|
+
this.audio.onplay = () => {
|
|
802
|
+
this.updateState({ isPlaying: true, isPaused: false });
|
|
803
|
+
this.config.onPlayStart?.();
|
|
804
|
+
this.initAudioContext();
|
|
805
|
+
this.startVisualizationLoop();
|
|
806
|
+
};
|
|
807
|
+
this.audio.onpause = () => {
|
|
808
|
+
if (this.isStopping) return;
|
|
809
|
+
this.updateState({ isPaused: true, isPlaying: false });
|
|
810
|
+
this.config.onPlayPause?.();
|
|
811
|
+
};
|
|
812
|
+
this.audio.onended = () => {
|
|
813
|
+
this.updateState({
|
|
814
|
+
isPlaying: false,
|
|
815
|
+
isPaused: false,
|
|
816
|
+
isSynthesizing: false,
|
|
817
|
+
progress: 0,
|
|
818
|
+
visualizationData: {
|
|
819
|
+
frequencyData: new Uint8Array(0),
|
|
820
|
+
timeDomainData: new Uint8Array(0)
|
|
821
|
+
}
|
|
822
|
+
});
|
|
823
|
+
this.config.onPlayEnd?.();
|
|
824
|
+
this.releaseBlobUrl();
|
|
825
|
+
this.pausedTime = 0;
|
|
826
|
+
this.stopVisualizationLoop();
|
|
827
|
+
this.config.onSessionEnd?.(this.id);
|
|
828
|
+
};
|
|
829
|
+
this.audio.onerror = async (e) => {
|
|
830
|
+
const msg = this.audio.error?.message || "Audio playback error";
|
|
831
|
+
if (msg.includes("Empty src") || msg.includes("empty src")) {
|
|
832
|
+
console.log("[PlaybackSession] Ignoring empty src error during transition");
|
|
833
|
+
return;
|
|
582
834
|
}
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
}
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
onPlayPause?.();
|
|
612
|
-
}, [onPlayPause]);
|
|
613
|
-
const resume = useCallback3(() => {
|
|
614
|
-
stopOthers();
|
|
615
|
-
if (isFallbackRef.current) {
|
|
616
|
-
window.speechSynthesis.resume();
|
|
617
|
-
} else if (audioRef.current) {
|
|
618
|
-
audioRef.current.play();
|
|
619
|
-
}
|
|
620
|
-
setIsPaused(false);
|
|
621
|
-
setIsPlaying(true);
|
|
622
|
-
onPlayResume?.();
|
|
623
|
-
activeInstances.set(instanceId, { pause });
|
|
624
|
-
}, [stopOthers, instanceId, pause, onPlayResume]);
|
|
625
|
-
const togglePlay = useCallback3(() => {
|
|
626
|
-
if (isPlaying) {
|
|
627
|
-
pause();
|
|
628
|
-
} else {
|
|
629
|
-
resume();
|
|
630
|
-
}
|
|
631
|
-
}, [isPlaying, pause, resume]);
|
|
632
|
-
const playFallback = useCallback3(
|
|
633
|
-
(text) => {
|
|
634
|
-
console.warn("[useMessageTTS] Switching to fallback TTS");
|
|
635
|
-
isFallbackRef.current = true;
|
|
636
|
-
if (clientRef.current) {
|
|
637
|
-
clientRef.current.close();
|
|
638
|
-
clientRef.current = null;
|
|
835
|
+
console.error("[PlaybackSession] Audio error:", msg);
|
|
836
|
+
const isBlobUrlExpired = msg.includes("ERR_FILE_NOT_FOUND") || msg.includes("PIPELINE_ERROR_READ") || msg.includes("MEDIA_ELEMENT_ERROR") || this.audio.error?.code === MediaError.MEDIA_ERR_NETWORK || this.audio.error?.code === MediaError.MEDIA_ERR_SRC_NOT_SUPPORTED;
|
|
837
|
+
if (isBlobUrlExpired && this.cachedAudioData) {
|
|
838
|
+
console.warn(
|
|
839
|
+
"[PlaybackSession] Blob URL expired, attempting to recreate from cache"
|
|
840
|
+
);
|
|
841
|
+
this.releaseBlobUrl();
|
|
842
|
+
const blob = new Blob(this.cachedAudioData, { type: "audio/mpeg" });
|
|
843
|
+
this.audioUrl = URL.createObjectURL(blob);
|
|
844
|
+
this.audio.src = this.audioUrl;
|
|
845
|
+
const resumeTime = this.pausedTime || 0;
|
|
846
|
+
try {
|
|
847
|
+
await this.audio.play();
|
|
848
|
+
if (resumeTime > 0) {
|
|
849
|
+
this.audio.currentTime = resumeTime;
|
|
850
|
+
}
|
|
851
|
+
return;
|
|
852
|
+
} catch (playErr) {
|
|
853
|
+
console.error("[PlaybackSession] Failed to replay from cache:", playErr);
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
this.updateState({ error: msg });
|
|
857
|
+
this.config.onError?.(new Error(msg));
|
|
858
|
+
};
|
|
859
|
+
this.audio.ontimeupdate = () => {
|
|
860
|
+
let duration = this.audio.duration;
|
|
861
|
+
if (!isFinite(duration) && this.audio.buffered.length > 0) {
|
|
862
|
+
duration = this.audio.buffered.end(this.audio.buffered.length - 1);
|
|
639
863
|
}
|
|
640
|
-
if (
|
|
641
|
-
|
|
642
|
-
|
|
864
|
+
if (isFinite(duration) && duration > 0) {
|
|
865
|
+
const progress = this.audio.currentTime / duration * 100;
|
|
866
|
+
this.updateState({ progress });
|
|
643
867
|
}
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
async (text, targetVoice) => {
|
|
675
|
-
stop();
|
|
676
|
-
stopOthers();
|
|
677
|
-
setErrorState(null);
|
|
678
|
-
setIsSynthesizing(true);
|
|
679
|
-
setProgress(0);
|
|
680
|
-
audioBuffersRef.current = [];
|
|
681
|
-
isFallbackRef.current = false;
|
|
682
|
-
const speed = audioParams?.speech_rate || 0;
|
|
683
|
-
const voice = targetVoice;
|
|
684
|
-
const cacheKey = TTSCache.generateKey(text, voice, speed);
|
|
685
|
-
cacheKeyRef.current = cacheKey;
|
|
686
|
-
const startTime = Date.now();
|
|
687
|
-
metricsCollector.record({
|
|
688
|
-
name: "tts_request",
|
|
689
|
-
labels: { voice, speed, text_length: text.length },
|
|
690
|
-
value: 1,
|
|
691
|
-
timestamp: startTime
|
|
692
|
-
});
|
|
693
|
-
try {
|
|
694
|
-
const cachedData = await TTSCache.get(cacheKey);
|
|
695
|
-
const audio = new Audio();
|
|
696
|
-
audio.crossOrigin = "anonymous";
|
|
697
|
-
audioRef.current = audio;
|
|
698
|
-
audio.onplay = () => {
|
|
699
|
-
setIsPlaying(true);
|
|
700
|
-
setIsPaused(false);
|
|
701
|
-
onPlayStart?.();
|
|
702
|
-
initAudioContext();
|
|
703
|
-
activeInstances.set(instanceId, { pause });
|
|
704
|
-
metricsCollector.record({
|
|
705
|
-
name: "tts_latency",
|
|
706
|
-
labels: { stage: "playback", voice, speed },
|
|
707
|
-
value: Date.now() - startTime,
|
|
708
|
-
timestamp: Date.now()
|
|
709
|
-
});
|
|
710
|
-
};
|
|
711
|
-
audio.onpause = () => {
|
|
712
|
-
if (!audio.ended) {
|
|
713
|
-
}
|
|
714
|
-
};
|
|
715
|
-
audio.onended = () => {
|
|
716
|
-
setIsPlaying(false);
|
|
717
|
-
setIsPaused(false);
|
|
718
|
-
onPlayEnd?.();
|
|
719
|
-
activeInstances.delete(instanceId);
|
|
720
|
-
};
|
|
721
|
-
audio.onerror = (e) => {
|
|
722
|
-
console.error("Audio playback error:", e, audio.error);
|
|
723
|
-
metricsCollector.record({
|
|
724
|
-
name: "tts_error",
|
|
725
|
-
labels: {
|
|
726
|
-
error_code: "playback_error",
|
|
727
|
-
voice,
|
|
728
|
-
detail: audio.error?.message || String(audio.error?.code)
|
|
729
|
-
},
|
|
730
|
-
value: 1,
|
|
731
|
-
timestamp: Date.now()
|
|
732
|
-
});
|
|
733
|
-
handleError(text, voice);
|
|
734
|
-
};
|
|
735
|
-
audio.ontimeupdate = () => {
|
|
736
|
-
let duration = audio.duration;
|
|
737
|
-
if (!isFinite(duration)) {
|
|
738
|
-
if (audio.buffered.length > 0) {
|
|
739
|
-
duration = audio.buffered.end(audio.buffered.length - 1);
|
|
740
|
-
}
|
|
741
|
-
}
|
|
742
|
-
if (isFinite(duration) && duration > 0) {
|
|
743
|
-
setProgress(audio.currentTime / duration * 100);
|
|
744
|
-
}
|
|
745
|
-
};
|
|
746
|
-
if (cachedData) {
|
|
747
|
-
const totalSize = cachedData.reduce(
|
|
748
|
-
(acc, buf) => acc + buf.byteLength,
|
|
749
|
-
0
|
|
750
|
-
);
|
|
751
|
-
metricsCollector.record({
|
|
752
|
-
name: "tts_cache_hit",
|
|
753
|
-
labels: { voice, speed },
|
|
754
|
-
value: 1,
|
|
755
|
-
timestamp: Date.now()
|
|
756
|
-
});
|
|
757
|
-
console.log(
|
|
758
|
-
JSON.stringify({
|
|
759
|
-
event: "tts_cache_hit",
|
|
760
|
-
cache_hit: true,
|
|
761
|
-
text_len: text.length,
|
|
762
|
-
voice,
|
|
763
|
-
speed,
|
|
764
|
-
data_size: totalSize
|
|
765
|
-
})
|
|
766
|
-
);
|
|
767
|
-
if (totalSize === 0) {
|
|
768
|
-
console.warn(
|
|
769
|
-
"[useMessageTTS] Cached data is empty, falling back to stream"
|
|
770
|
-
);
|
|
771
|
-
} else {
|
|
772
|
-
const blob = new Blob(cachedData, { type: "audio/mpeg" });
|
|
773
|
-
const url2 = URL.createObjectURL(blob);
|
|
774
|
-
audioUrlRef.current = url2;
|
|
775
|
-
audio.src = url2;
|
|
776
|
-
setIsSynthesizing(false);
|
|
777
|
-
if (autoPlay) {
|
|
778
|
-
try {
|
|
779
|
-
await audio.play();
|
|
780
|
-
} catch (err) {
|
|
781
|
-
console.warn("AutoPlay blocked", err);
|
|
782
|
-
}
|
|
783
|
-
}
|
|
784
|
-
return;
|
|
785
|
-
}
|
|
868
|
+
};
|
|
869
|
+
}
|
|
870
|
+
/**
|
|
871
|
+
* 建立 WebSocket 连接
|
|
872
|
+
*/
|
|
873
|
+
async connect() {
|
|
874
|
+
if (this.state.isConnected) return;
|
|
875
|
+
this.updateState({
|
|
876
|
+
error: null,
|
|
877
|
+
progress: 0,
|
|
878
|
+
isSynthesizing: false,
|
|
879
|
+
isConnected: false,
|
|
880
|
+
isSessionStarted: false
|
|
881
|
+
});
|
|
882
|
+
this.streamText = "";
|
|
883
|
+
this.segmentQueue = [];
|
|
884
|
+
this.sessionAudioBuffers = [];
|
|
885
|
+
this.isStreamFinished = false;
|
|
886
|
+
this.isSessionFinished = false;
|
|
887
|
+
this.isSessionStarting = false;
|
|
888
|
+
if (this.client) {
|
|
889
|
+
this.client.close();
|
|
890
|
+
this.client = null;
|
|
891
|
+
}
|
|
892
|
+
this.splitter = new StreamingTextSplitter({
|
|
893
|
+
maxLength: this.config.maxSegmentLength || 150,
|
|
894
|
+
onSegmentComplete: (segment) => {
|
|
895
|
+
this.segmentQueue.push(segment);
|
|
896
|
+
if (this.state.isSessionStarted) {
|
|
897
|
+
this.processQueue();
|
|
786
898
|
}
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
899
|
+
}
|
|
900
|
+
});
|
|
901
|
+
this.client = WebsocketMSE2({ autoStartSession: false });
|
|
902
|
+
const { ttsConfig, audioParams } = this.config;
|
|
903
|
+
const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
|
|
904
|
+
const startTime = Date.now();
|
|
905
|
+
this.config.metricsCollector?.record({
|
|
906
|
+
name: "tts_request",
|
|
907
|
+
labels: { voice, text_length: 0 },
|
|
908
|
+
value: 1,
|
|
909
|
+
timestamp: startTime
|
|
910
|
+
});
|
|
911
|
+
return new Promise((resolve, reject) => {
|
|
912
|
+
const timeoutId = setTimeout(() => {
|
|
913
|
+
const err = new Error("WebSocket connection timeout (10s)");
|
|
914
|
+
this.updateState({ error: err.message });
|
|
915
|
+
reject(err);
|
|
916
|
+
}, 1e4);
|
|
917
|
+
try {
|
|
918
|
+
const url = this.client.start({
|
|
795
919
|
url: buildFullUrl2(WS_URL, {
|
|
796
920
|
api_access_key: `Jwt; ${ttsConfig.token}`,
|
|
797
921
|
api_app_key: ttsConfig.appid,
|
|
798
922
|
api_resource_id: ttsConfig.resourceId || "seed-tts-2.0"
|
|
799
923
|
}),
|
|
800
924
|
config: {
|
|
801
|
-
user: {
|
|
802
|
-
uid: `req-${Date.now()}`
|
|
803
|
-
},
|
|
925
|
+
user: { uid: `req-${Date.now()}` },
|
|
804
926
|
namespace: ttsConfig.namespace || "BidirectionalTTS",
|
|
805
927
|
req_params: {
|
|
806
928
|
speaker: voice,
|
|
@@ -815,455 +937,722 @@ function useMessageTTS({
|
|
|
815
937
|
enable_language_detector: true,
|
|
816
938
|
disable_markdown_filter: true,
|
|
817
939
|
enable_latex_tn: true
|
|
818
|
-
// max_length_to_filter_parenthesis: 100,
|
|
819
940
|
})
|
|
820
941
|
}
|
|
821
942
|
},
|
|
943
|
+
onStart: () => {
|
|
944
|
+
this.updateState({ isConnected: true });
|
|
945
|
+
},
|
|
946
|
+
onConnectionReady: () => {
|
|
947
|
+
clearTimeout(timeoutId);
|
|
948
|
+
resolve();
|
|
949
|
+
},
|
|
822
950
|
onSessionStarted: () => {
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
951
|
+
this.updateState({ isSessionStarted: true });
|
|
952
|
+
this.isSessionStarting = false;
|
|
953
|
+
if (this.segmentQueue.length > 0) {
|
|
954
|
+
this.processQueue();
|
|
955
|
+
}
|
|
827
956
|
},
|
|
828
957
|
onMessage: (data) => {
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
);
|
|
958
|
+
this.updateState({ isSynthesizing: true });
|
|
959
|
+
if (this.sessionAudioBuffers.length === 0) {
|
|
960
|
+
this.config.metricsCollector?.record({
|
|
961
|
+
name: "tts_latency",
|
|
962
|
+
labels: { stage: "first_packet", voice },
|
|
963
|
+
value: Date.now() - startTime,
|
|
964
|
+
timestamp: Date.now()
|
|
965
|
+
});
|
|
837
966
|
}
|
|
838
967
|
const buffer = data instanceof ArrayBuffer ? data.slice(0) : new Uint8Array(data).buffer;
|
|
839
|
-
|
|
968
|
+
this.sessionAudioBuffers.push(buffer);
|
|
840
969
|
},
|
|
841
970
|
onSessionFinished: () => {
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
971
|
+
this.updateState({
|
|
972
|
+
isSynthesizing: false,
|
|
973
|
+
isSessionStarted: false
|
|
974
|
+
});
|
|
975
|
+
if (this.sessionAudioBuffers.length > 0) {
|
|
976
|
+
this.cachedAudioData = [...this.sessionAudioBuffers];
|
|
845
977
|
}
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
text_len: text.length,
|
|
851
|
-
duration_ms: Date.now() - startTime,
|
|
978
|
+
if (this.sessionAudioBuffers.length > 0 && this.streamText) {
|
|
979
|
+
const speed = audioParams?.speech_rate || 0;
|
|
980
|
+
const cacheKey = TTSCache.generateKey(
|
|
981
|
+
this.streamText,
|
|
852
982
|
voice,
|
|
853
983
|
speed
|
|
854
|
-
|
|
855
|
-
|
|
984
|
+
);
|
|
985
|
+
TTSCache.set(cacheKey, [...this.sessionAudioBuffers]);
|
|
986
|
+
}
|
|
987
|
+
this.config.metricsCollector?.record({
|
|
988
|
+
name: "tts_synthesis_finished",
|
|
989
|
+
labels: { voice, text_length: this.streamText.length },
|
|
990
|
+
value: Date.now() - startTime,
|
|
991
|
+
timestamp: Date.now()
|
|
992
|
+
});
|
|
856
993
|
},
|
|
857
994
|
onError: (err) => {
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
995
|
+
if (!this.state.isConnected) {
|
|
996
|
+
clearTimeout(timeoutId);
|
|
997
|
+
reject(new Error(err.msg || "TTS error"));
|
|
998
|
+
}
|
|
999
|
+
console.error("[PlaybackSession] TTS error:", err);
|
|
1000
|
+
this.updateState({
|
|
1001
|
+
error: err.msg || "TTS error",
|
|
1002
|
+
isSynthesizing: false
|
|
864
1003
|
});
|
|
865
|
-
|
|
866
|
-
|
|
1004
|
+
this.config.onError?.(new Error(err.msg || "TTS error"));
|
|
1005
|
+
},
|
|
1006
|
+
onWSError: (err) => {
|
|
1007
|
+
if (!this.state.isConnected) {
|
|
1008
|
+
clearTimeout(timeoutId);
|
|
1009
|
+
reject(err instanceof Error ? err : new Error("WebSocket error"));
|
|
1010
|
+
}
|
|
867
1011
|
}
|
|
868
1012
|
});
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
1013
|
+
if (this.audioUrl) {
|
|
1014
|
+
URL.revokeObjectURL(this.audioUrl);
|
|
1015
|
+
}
|
|
1016
|
+
this.audioUrl = url;
|
|
1017
|
+
this.audio.src = url;
|
|
1018
|
+
if (this.config.autoPlay !== false) {
|
|
1019
|
+
this.audio.play().catch(
|
|
1020
|
+
(e) => console.warn("[PlaybackSession] Autoplay blocked:", e)
|
|
1021
|
+
);
|
|
877
1022
|
}
|
|
878
1023
|
} catch (err) {
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
timestamp: Date.now()
|
|
885
|
-
});
|
|
886
|
-
handleError(text, voice);
|
|
887
|
-
}
|
|
888
|
-
},
|
|
889
|
-
[
|
|
890
|
-
ttsConfig,
|
|
891
|
-
audioParams,
|
|
892
|
-
autoPlay,
|
|
893
|
-
stop,
|
|
894
|
-
stopOthers,
|
|
895
|
-
instanceId,
|
|
896
|
-
onPlayStart,
|
|
897
|
-
onPlayEnd,
|
|
898
|
-
initAudioContext,
|
|
899
|
-
pause,
|
|
900
|
-
fallbackVoice,
|
|
901
|
-
metricsCollector
|
|
902
|
-
]
|
|
903
|
-
);
|
|
904
|
-
const handleError = useCallback3(
|
|
905
|
-
(text, failedVoice) => {
|
|
906
|
-
if (fallbackVoice && failedVoice !== fallbackVoice) {
|
|
907
|
-
console.warn(
|
|
908
|
-
`[useMessageTTS] Voice ${failedVoice} failed, switching to fallback voice ${fallbackVoice}`
|
|
1024
|
+
clearTimeout(timeoutId);
|
|
1025
|
+
console.error("[PlaybackSession] Connect error:", err);
|
|
1026
|
+
this.updateState({ error: String(err) });
|
|
1027
|
+
this.config.onError?.(
|
|
1028
|
+
err instanceof Error ? err : new Error(String(err))
|
|
909
1029
|
);
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
1030
|
+
reject(err);
|
|
1031
|
+
}
|
|
1032
|
+
});
|
|
1033
|
+
}
|
|
1034
|
+
/**
|
|
1035
|
+
* 发送流式文本
|
|
1036
|
+
*/
|
|
1037
|
+
handleStreamChunk(chunk) {
|
|
1038
|
+
if (!chunk) return;
|
|
1039
|
+
this.streamText += chunk;
|
|
1040
|
+
if (!this.state.isSessionStarted && !this.isSessionStarting && this.client && this.state.isConnected && !this.isSessionFinished) {
|
|
1041
|
+
this.isSessionStarting = true;
|
|
1042
|
+
this.client.startSession();
|
|
1043
|
+
}
|
|
1044
|
+
this.splitter?.onChunk(chunk);
|
|
1045
|
+
if (this.state.isSessionStarted) {
|
|
1046
|
+
this.processQueue();
|
|
1047
|
+
}
|
|
1048
|
+
}
|
|
1049
|
+
/**
|
|
1050
|
+
* 结束流式输入
|
|
1051
|
+
*/
|
|
1052
|
+
async finishStream() {
|
|
1053
|
+
this.isStreamFinished = true;
|
|
1054
|
+
this.updateState({ isStreamFinished: true });
|
|
1055
|
+
this.splitter?.complete();
|
|
1056
|
+
if (this.state.isSessionStarted) {
|
|
1057
|
+
this.processQueue();
|
|
1058
|
+
}
|
|
1059
|
+
if (this.segmentQueue.length > 0 || this.isSending) {
|
|
1060
|
+
await new Promise((resolve) => {
|
|
1061
|
+
this.resolveAllSegmentsSent = resolve;
|
|
1062
|
+
});
|
|
1063
|
+
} else if (this.client && this.state.isSessionStarted && !this.isSessionFinished) {
|
|
1064
|
+
this.isSessionFinished = true;
|
|
1065
|
+
this.client.finishSession();
|
|
1066
|
+
}
|
|
1067
|
+
}
|
|
1068
|
+
/**
|
|
1069
|
+
* 处理非流式播放(直接播放整段文本)
|
|
1070
|
+
*/
|
|
1071
|
+
async play(text) {
|
|
1072
|
+
const formattedText = MarkdownFormatter3.format(text).replace(
|
|
1073
|
+
emojiRegex3(),
|
|
1074
|
+
""
|
|
1075
|
+
);
|
|
1076
|
+
const { audioParams } = this.config;
|
|
1077
|
+
const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
|
|
1078
|
+
const speed = audioParams?.speech_rate || 0;
|
|
1079
|
+
const cacheKey = TTSCache.generateKey(formattedText, voice, speed);
|
|
1080
|
+
const cachedData = await TTSCache.get(cacheKey);
|
|
1081
|
+
if (cachedData && cachedData.length > 0) {
|
|
1082
|
+
this.cachedAudioData = cachedData;
|
|
1083
|
+
this.releaseBlobUrl();
|
|
1084
|
+
const blob = new Blob(cachedData, { type: "audio/mpeg" });
|
|
1085
|
+
this.audioUrl = URL.createObjectURL(blob);
|
|
1086
|
+
this.audio.src = this.audioUrl;
|
|
1087
|
+
this.pausedTime = 0;
|
|
1088
|
+
this.updateState({ isSynthesizing: false });
|
|
1089
|
+
if (this.config.autoPlay !== false) {
|
|
1090
|
+
try {
|
|
1091
|
+
await this.audio.play();
|
|
1092
|
+
} catch (e) {
|
|
1093
|
+
console.warn("Autoplay blocked", e);
|
|
917
1094
|
}
|
|
918
|
-
executeTTS(text, fallbackVoice);
|
|
919
|
-
} else {
|
|
920
|
-
playFallback(text);
|
|
921
1095
|
}
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
(
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
1096
|
+
return;
|
|
1097
|
+
}
|
|
1098
|
+
await this.connect();
|
|
1099
|
+
this.streamText = formattedText;
|
|
1100
|
+
const segments = splitTextByDelimiters(formattedText);
|
|
1101
|
+
if (this.state.isConnected) {
|
|
1102
|
+
if (!this.state.isSessionStarted && !this.isSessionStarting) {
|
|
1103
|
+
this.isSessionStarting = true;
|
|
1104
|
+
this.client?.startSession();
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
segments.forEach((seg, idx) => {
|
|
1108
|
+
this.segmentQueue.push({
|
|
1109
|
+
index: idx,
|
|
1110
|
+
content: seg.content,
|
|
1111
|
+
length: seg.content.length,
|
|
1112
|
+
sent: false
|
|
1113
|
+
});
|
|
1114
|
+
});
|
|
1115
|
+
if (this.state.isSessionStarted) {
|
|
1116
|
+
this.processQueue();
|
|
1117
|
+
}
|
|
1118
|
+
await this.finishStream();
|
|
1119
|
+
}
|
|
1120
|
+
processQueue() {
|
|
1121
|
+
if (!this.client || !this.state.isSessionStarted || this.isSending || this.isSessionFinished) {
|
|
1122
|
+
return;
|
|
1123
|
+
}
|
|
1124
|
+
if (this.segmentQueue.length === 0) {
|
|
1125
|
+
if (this.isStreamFinished && !this.isSessionFinished) {
|
|
1126
|
+
this.isSessionFinished = true;
|
|
1127
|
+
this.client.finishSession();
|
|
1128
|
+
this.resolveAllSegmentsSent?.();
|
|
1129
|
+
}
|
|
1130
|
+
return;
|
|
1131
|
+
}
|
|
1132
|
+
this.isSending = true;
|
|
1133
|
+
const segment = this.segmentQueue.shift();
|
|
1134
|
+
this.client.sendText(segment.content);
|
|
1135
|
+
segment.sent = true;
|
|
1136
|
+
this.isSending = false;
|
|
1137
|
+
setTimeout(() => this.processQueue(), 0);
|
|
1138
|
+
}
|
|
1139
|
+
pause() {
|
|
1140
|
+
if (this.isStopping) return;
|
|
1141
|
+
this.pausedTime = this.audio.currentTime;
|
|
1142
|
+
this.audio.pause();
|
|
1143
|
+
this.releaseBlobUrl();
|
|
1144
|
+
this.updateState({ isPaused: true, isPlaying: false });
|
|
1145
|
+
}
|
|
1146
|
+
async resume() {
|
|
1147
|
+
if (!this.audioUrl && this.cachedAudioData) {
|
|
1148
|
+
const blob = new Blob(this.cachedAudioData, { type: "audio/mpeg" });
|
|
1149
|
+
this.audioUrl = URL.createObjectURL(blob);
|
|
1150
|
+
this.audio.src = this.audioUrl;
|
|
1151
|
+
await new Promise((resolve, reject) => {
|
|
1152
|
+
const onLoaded = () => {
|
|
1153
|
+
resolve();
|
|
1154
|
+
this.audio.removeEventListener("loadedmetadata", onLoaded);
|
|
1155
|
+
this.audio.removeEventListener("error", onError);
|
|
1156
|
+
};
|
|
1157
|
+
const onError = () => {
|
|
1158
|
+
reject(new Error("Failed to load audio"));
|
|
1159
|
+
this.audio.removeEventListener("loadedmetadata", onLoaded);
|
|
1160
|
+
this.audio.removeEventListener("error", onError);
|
|
1161
|
+
};
|
|
1162
|
+
this.audio.addEventListener("loadedmetadata", onLoaded);
|
|
1163
|
+
this.audio.addEventListener("error", onError);
|
|
1164
|
+
setTimeout(() => {
|
|
1165
|
+
this.audio.removeEventListener("loadedmetadata", onLoaded);
|
|
1166
|
+
this.audio.removeEventListener("error", onError);
|
|
1167
|
+
resolve();
|
|
1168
|
+
}, 3e3);
|
|
1169
|
+
});
|
|
1170
|
+
this.audio.currentTime = this.pausedTime;
|
|
1171
|
+
}
|
|
1172
|
+
await this.audio.play();
|
|
1173
|
+
this.updateState({ isPaused: false, isPlaying: true });
|
|
1174
|
+
}
|
|
1175
|
+
stop() {
|
|
1176
|
+
this.isStopping = true;
|
|
1177
|
+
if (this.client) {
|
|
1178
|
+
this.client.close();
|
|
1179
|
+
this.client = null;
|
|
1180
|
+
}
|
|
1181
|
+
this.audio.pause();
|
|
1182
|
+
this.audio.currentTime = 0;
|
|
1183
|
+
this.releaseBlobUrl();
|
|
1184
|
+
this.cachedAudioData = null;
|
|
1185
|
+
this.pausedTime = 0;
|
|
1186
|
+
this.stopVisualizationLoop();
|
|
1187
|
+
this.audioContext?.close();
|
|
1188
|
+
this.audioContext = null;
|
|
1189
|
+
this.updateState({
|
|
1190
|
+
isPlaying: false,
|
|
1191
|
+
isPaused: false,
|
|
1192
|
+
isSynthesizing: false,
|
|
1193
|
+
progress: 0,
|
|
1194
|
+
isConnected: false,
|
|
1195
|
+
isSessionStarted: false,
|
|
1196
|
+
// 清除可视化数据
|
|
1197
|
+
visualizationData: {
|
|
1198
|
+
frequencyData: new Uint8Array(0),
|
|
1199
|
+
timeDomainData: new Uint8Array(0)
|
|
1200
|
+
}
|
|
1201
|
+
});
|
|
1202
|
+
this.isStopping = false;
|
|
1203
|
+
}
|
|
1204
|
+
seek(percentage) {
|
|
1205
|
+
let duration = this.audio.duration;
|
|
1206
|
+
if (!isFinite(duration) && this.audio.buffered.length > 0) {
|
|
1207
|
+
duration = this.audio.buffered.end(this.audio.buffered.length - 1);
|
|
1208
|
+
}
|
|
1209
|
+
if (isFinite(duration) && duration > 0) {
|
|
1210
|
+
const time = percentage / 100 * duration;
|
|
1211
|
+
if (isFinite(time)) {
|
|
1212
|
+
this.audio.currentTime = time;
|
|
1213
|
+
this.updateState({ progress: percentage });
|
|
1214
|
+
}
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
updateState(partial) {
|
|
1218
|
+
this.state = { ...this.state, ...partial };
|
|
1219
|
+
this.notifyListeners();
|
|
1220
|
+
}
|
|
1221
|
+
subscribe(listener) {
|
|
1222
|
+
this.listeners.add(listener);
|
|
1223
|
+
listener(this.state);
|
|
1224
|
+
return () => this.listeners.delete(listener);
|
|
1225
|
+
}
|
|
1226
|
+
notifyListeners() {
|
|
1227
|
+
this.listeners.forEach((l) => l(this.state));
|
|
1228
|
+
}
|
|
1229
|
+
// Visualization
|
|
1230
|
+
getFrequencyData() {
|
|
1231
|
+
if (!this.analyser) return new Uint8Array(0);
|
|
1232
|
+
const data = new Uint8Array(this.analyser.frequencyBinCount);
|
|
1233
|
+
this.analyser.getByteFrequencyData(data);
|
|
1234
|
+
return data;
|
|
1235
|
+
}
|
|
1236
|
+
getTimeDomainData() {
|
|
1237
|
+
if (!this.analyser) return new Uint8Array(0);
|
|
1238
|
+
const data = new Uint8Array(this.analyser.frequencyBinCount);
|
|
1239
|
+
this.analyser.getByteTimeDomainData(data);
|
|
1240
|
+
return data;
|
|
1241
|
+
}
|
|
1242
|
+
startVisualizationLoop() {
|
|
1243
|
+
if (!this.config.visualization?.enabled) return;
|
|
949
1244
|
const update = (timestamp) => {
|
|
950
|
-
if (isPlaying && !isPaused) {
|
|
951
|
-
if (timestamp -
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
1245
|
+
if (this.state.isPlaying && !this.state.isPaused) {
|
|
1246
|
+
if (timestamp - this.lastVisUpdate >= (this.config.visualization?.refreshInterval || 0)) {
|
|
1247
|
+
this.updateState({
|
|
1248
|
+
visualizationData: {
|
|
1249
|
+
frequencyData: this.getFrequencyData(),
|
|
1250
|
+
timeDomainData: this.getTimeDomainData()
|
|
1251
|
+
}
|
|
955
1252
|
});
|
|
956
|
-
|
|
1253
|
+
this.lastVisUpdate = timestamp;
|
|
957
1254
|
}
|
|
958
|
-
animId = requestAnimationFrame(update);
|
|
1255
|
+
this.animId = requestAnimationFrame(update);
|
|
959
1256
|
}
|
|
960
1257
|
};
|
|
961
|
-
|
|
962
|
-
|
|
1258
|
+
this.animId = requestAnimationFrame(update);
|
|
1259
|
+
}
|
|
1260
|
+
stopVisualizationLoop() {
|
|
1261
|
+
if (this.animId) {
|
|
1262
|
+
cancelAnimationFrame(this.animId);
|
|
1263
|
+
this.animId = null;
|
|
963
1264
|
}
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
1265
|
+
}
|
|
1266
|
+
/**
|
|
1267
|
+
* 释放 Blob URL 资源
|
|
1268
|
+
* 在暂停、停止、播放完毕时调用,避免 Blob URL 长期占用内存和过期问题
|
|
1269
|
+
*/
|
|
1270
|
+
releaseBlobUrl() {
|
|
1271
|
+
if (this.audioUrl) {
|
|
1272
|
+
URL.revokeObjectURL(this.audioUrl);
|
|
1273
|
+
this.audioUrl = null;
|
|
1274
|
+
}
|
|
1275
|
+
this.audio.src = "";
|
|
1276
|
+
this.audio.load();
|
|
1277
|
+
}
|
|
1278
|
+
};
|
|
1279
|
+
var StreamPlaybackManagerImpl = class {
|
|
1280
|
+
constructor() {
|
|
1281
|
+
this.sessions = /* @__PURE__ */ new Map();
|
|
1282
|
+
this.activeStreamId = null;
|
|
1283
|
+
}
|
|
1284
|
+
/**
|
|
1285
|
+
* 创建新的播放会话
|
|
1286
|
+
*/
|
|
1287
|
+
createSession(id, config) {
|
|
1288
|
+
if (this.activeStreamId && this.activeStreamId !== id) {
|
|
1289
|
+
const activeSession = this.sessions.get(this.activeStreamId);
|
|
1290
|
+
if (activeSession) {
|
|
1291
|
+
const isPlaying = activeSession.state.isPlaying;
|
|
1292
|
+
const isPaused = activeSession.state.isPaused;
|
|
1293
|
+
console.log(`[StreamPlaybackManager] Checking active session ${this.activeStreamId}: isPlaying=${isPlaying}, isPaused=${isPaused}`);
|
|
1294
|
+
if (isPlaying || isPaused) {
|
|
1295
|
+
console.log(`[StreamPlaybackManager] Pausing active session ${this.activeStreamId}`);
|
|
1296
|
+
this.pause(this.activeStreamId);
|
|
1297
|
+
} else {
|
|
1298
|
+
console.log(`[StreamPlaybackManager] Active session ${this.activeStreamId} is not playing/paused, skipping pause`);
|
|
989
1299
|
}
|
|
990
1300
|
}
|
|
991
1301
|
}
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
progress,
|
|
1005
|
-
getFrequencyData,
|
|
1006
|
-
getTimeDomainData,
|
|
1007
|
-
visualizationData
|
|
1008
|
-
};
|
|
1009
|
-
}
|
|
1010
|
-
|
|
1011
|
-
// src/tts/useStreamTTS.ts
|
|
1012
|
-
import { WebsocketMSE as WebsocketMSE3 } from "@wq-hook/volcano-sdk/tts";
|
|
1013
|
-
import { useCallback as useCallback4, useEffect as useEffect3, useRef as useRef4, useState as useState4 } from "react";
|
|
1014
|
-
|
|
1015
|
-
// src/tts/StreamingTextSplitter.ts
|
|
1016
|
-
import { MarkdownFormatter as MarkdownFormatter3 } from "@wq-hook/volcano-sdk";
|
|
1017
|
-
import emojiRegex3 from "emoji-regex";
|
|
1018
|
-
var StreamingTextSplitter = class {
|
|
1019
|
-
constructor(options = {}) {
|
|
1020
|
-
/** 当前缓冲区 */
|
|
1021
|
-
this.buffer = "";
|
|
1022
|
-
/** 分段索引计数器 */
|
|
1023
|
-
this.segmentIndex = 0;
|
|
1024
|
-
/** 已完成的分段列表 */
|
|
1025
|
-
this.segments = [];
|
|
1026
|
-
/** 是否已完成 */
|
|
1027
|
-
this.isCompleted = false;
|
|
1028
|
-
this.maxLength = options.maxLength || 150;
|
|
1029
|
-
this.minLength = options.minLength || 10;
|
|
1030
|
-
this.onSegmentComplete = options.onSegmentComplete;
|
|
1031
|
-
this.onAllComplete = options.onAllComplete;
|
|
1302
|
+
const session = new PlaybackSession(id, {
|
|
1303
|
+
...config,
|
|
1304
|
+
onSessionEnd: (sessionId) => {
|
|
1305
|
+
if (this.activeStreamId === sessionId) {
|
|
1306
|
+
this.activeStreamId = null;
|
|
1307
|
+
}
|
|
1308
|
+
config.onSessionEnd?.(sessionId);
|
|
1309
|
+
}
|
|
1310
|
+
});
|
|
1311
|
+
this.sessions.set(id, session);
|
|
1312
|
+
this.activeStreamId = id;
|
|
1313
|
+
return session;
|
|
1032
1314
|
}
|
|
1033
1315
|
/**
|
|
1034
|
-
*
|
|
1035
|
-
* @param chunk - 文本块
|
|
1316
|
+
* 获取会话
|
|
1036
1317
|
*/
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
this.buffer += chunk;
|
|
1040
|
-
if (this.detectBoundary(chunk)) {
|
|
1041
|
-
const newlineIndex = this.buffer.indexOf("\n");
|
|
1042
|
-
if (newlineIndex !== -1) {
|
|
1043
|
-
if (newlineIndex === 0) {
|
|
1044
|
-
this.buffer = this.buffer.substring(1);
|
|
1045
|
-
return;
|
|
1046
|
-
}
|
|
1047
|
-
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
1048
|
-
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
1049
|
-
this.flushSegmentWithBuffer(segmentBuffer);
|
|
1050
|
-
while (this.buffer.includes("\n")) {
|
|
1051
|
-
const nextNewlineIndex = this.buffer.indexOf("\n");
|
|
1052
|
-
if (nextNewlineIndex === 0) {
|
|
1053
|
-
this.buffer = this.buffer.substring(1);
|
|
1054
|
-
continue;
|
|
1055
|
-
}
|
|
1056
|
-
const nextSegmentBuffer = this.buffer.substring(0, nextNewlineIndex);
|
|
1057
|
-
this.buffer = this.buffer.substring(nextNewlineIndex + 1);
|
|
1058
|
-
this.flushSegmentWithBuffer(nextSegmentBuffer);
|
|
1059
|
-
}
|
|
1060
|
-
}
|
|
1061
|
-
}
|
|
1318
|
+
getSession(id) {
|
|
1319
|
+
return this.sessions.get(id);
|
|
1062
1320
|
}
|
|
1063
1321
|
/**
|
|
1064
|
-
*
|
|
1065
|
-
* @param chunk - 最新接收的文本块
|
|
1066
|
-
* @returns 是否应该分段
|
|
1322
|
+
* 停止会话
|
|
1067
1323
|
*/
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1324
|
+
stop(id) {
|
|
1325
|
+
const session = this.sessions.get(id);
|
|
1326
|
+
if (session) {
|
|
1327
|
+
session.stop();
|
|
1328
|
+
this.sessions.delete(id);
|
|
1329
|
+
if (this.activeStreamId === id) {
|
|
1330
|
+
this.activeStreamId = null;
|
|
1072
1331
|
}
|
|
1073
|
-
return true;
|
|
1074
1332
|
}
|
|
1075
|
-
if (this.buffer.length >= this.maxLength) {
|
|
1076
|
-
this.forceSplitAtSentenceBoundary();
|
|
1077
|
-
return true;
|
|
1078
|
-
}
|
|
1079
|
-
return false;
|
|
1080
1333
|
}
|
|
1081
1334
|
/**
|
|
1082
|
-
*
|
|
1335
|
+
* 暂停会话
|
|
1083
1336
|
*/
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
const sentenceEnders = /[。?!]/g;
|
|
1087
|
-
let lastMatch = null;
|
|
1088
|
-
let match = null;
|
|
1089
|
-
while ((match = sentenceEnders.exec(content)) !== null) {
|
|
1090
|
-
lastMatch = match;
|
|
1091
|
-
}
|
|
1092
|
-
if (lastMatch && lastMatch.index > this.minLength) {
|
|
1093
|
-
const splitPoint = lastMatch.index + 1;
|
|
1094
|
-
const firstPart = content.substring(0, splitPoint);
|
|
1095
|
-
const secondPart = content.substring(splitPoint);
|
|
1096
|
-
this.buffer = firstPart;
|
|
1097
|
-
this.flushSegment();
|
|
1098
|
-
this.buffer = secondPart;
|
|
1099
|
-
} else {
|
|
1100
|
-
const midPoint = Math.floor(content.length / 2);
|
|
1101
|
-
const firstPart = content.substring(0, midPoint);
|
|
1102
|
-
const secondPart = content.substring(midPoint);
|
|
1103
|
-
this.buffer = firstPart;
|
|
1104
|
-
this.flushSegment();
|
|
1105
|
-
this.buffer = secondPart;
|
|
1106
|
-
}
|
|
1337
|
+
pause(id) {
|
|
1338
|
+
this.sessions.get(id)?.pause();
|
|
1107
1339
|
}
|
|
1108
1340
|
/**
|
|
1109
|
-
*
|
|
1110
|
-
* @param bufferToFlush - 要分段的缓冲区内容
|
|
1341
|
+
* 恢复会话
|
|
1111
1342
|
*/
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
1116
|
-
const isTooShort = content.length < 3;
|
|
1117
|
-
if (isPureSymbols && isTooShort) {
|
|
1118
|
-
return;
|
|
1119
|
-
}
|
|
1120
|
-
const formattedContent = MarkdownFormatter3.format(content).replace(emojiRegex3(), "");
|
|
1121
|
-
if (!formattedContent) return;
|
|
1122
|
-
let subSegments = [formattedContent];
|
|
1123
|
-
if (formattedContent.length > this.maxLength) {
|
|
1124
|
-
subSegments = this.splitLongSegment(formattedContent);
|
|
1125
|
-
}
|
|
1126
|
-
for (const subSegment of subSegments) {
|
|
1127
|
-
if (!subSegment) continue;
|
|
1128
|
-
const segment = {
|
|
1129
|
-
index: this.segmentIndex++,
|
|
1130
|
-
content: subSegment,
|
|
1131
|
-
length: subSegment.length,
|
|
1132
|
-
sent: false
|
|
1133
|
-
};
|
|
1134
|
-
this.segments.push(segment);
|
|
1135
|
-
this.onSegmentComplete?.(segment);
|
|
1343
|
+
resume(id) {
|
|
1344
|
+
if (this.activeStreamId && this.activeStreamId !== id) {
|
|
1345
|
+
this.pause(this.activeStreamId);
|
|
1136
1346
|
}
|
|
1347
|
+
this.sessions.get(id)?.resume();
|
|
1348
|
+
this.activeStreamId = id;
|
|
1137
1349
|
}
|
|
1138
1350
|
/**
|
|
1139
|
-
*
|
|
1351
|
+
* 注册(兼容旧 API,但推荐直接用 createSession)
|
|
1352
|
+
* 为了兼容 useMessageTTS 旧逻辑,这里可以保留一些别名,但我们会重构 hook,所以可以改变 API。
|
|
1140
1353
|
*/
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1354
|
+
};
|
|
1355
|
+
var StreamPlaybackManager = new StreamPlaybackManagerImpl();
|
|
1356
|
+
|
|
1357
|
+
// src/tts/Metrics.ts
|
|
1358
|
+
var NoopMetricsCollector = class {
|
|
1359
|
+
record(_metric) {
|
|
1360
|
+
}
|
|
1361
|
+
};
|
|
1362
|
+
|
|
1363
|
+
// src/tts/useMessageTTS.ts
|
|
1364
|
+
function useMessageTTS({
|
|
1365
|
+
ttsConfig,
|
|
1366
|
+
audioParams,
|
|
1367
|
+
autoPlay = true,
|
|
1368
|
+
metricsCollector = new NoopMetricsCollector(),
|
|
1369
|
+
onPlayStart,
|
|
1370
|
+
onPlayPause,
|
|
1371
|
+
onPlayResume,
|
|
1372
|
+
onPlayEnd,
|
|
1373
|
+
onStop,
|
|
1374
|
+
onError,
|
|
1375
|
+
fallbackVoice,
|
|
1376
|
+
visualization,
|
|
1377
|
+
streamId: externalStreamId
|
|
1378
|
+
}) {
|
|
1379
|
+
const isSubscriptionMode = !!externalStreamId;
|
|
1380
|
+
const [internalStreamId, setInternalStreamId] = useState3("");
|
|
1381
|
+
const [isSwitchedToIndependent, setIsSwitchedToIndependent] = useState3(false);
|
|
1382
|
+
const streamId = isSwitchedToIndependent ? internalStreamId : externalStreamId || internalStreamId;
|
|
1383
|
+
const [state, setState] = useState3({
|
|
1384
|
+
isPlaying: false,
|
|
1385
|
+
isPaused: false,
|
|
1386
|
+
isSynthesizing: false,
|
|
1387
|
+
progress: 0,
|
|
1388
|
+
visualizationData: {
|
|
1389
|
+
frequencyData: new Uint8Array(0),
|
|
1390
|
+
timeDomainData: new Uint8Array(0)
|
|
1391
|
+
},
|
|
1392
|
+
error: null,
|
|
1393
|
+
isConnected: false,
|
|
1394
|
+
isSessionStarted: false,
|
|
1395
|
+
isStreamFinished: false
|
|
1396
|
+
});
|
|
1397
|
+
const [error, setErrorState] = useState3(null);
|
|
1398
|
+
const isFallbackRef = useRef3(false);
|
|
1399
|
+
const fallbackUtteranceRef = useRef3(null);
|
|
1400
|
+
const currentTextRef = useRef3("");
|
|
1401
|
+
useEffect2(() => {
|
|
1402
|
+
if (!streamId) return;
|
|
1403
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1404
|
+
if (session) {
|
|
1405
|
+
const unsubscribe = session.subscribe((newState) => {
|
|
1406
|
+
setState(newState);
|
|
1407
|
+
if (newState.error) setErrorState(newState.error);
|
|
1408
|
+
});
|
|
1409
|
+
return () => {
|
|
1410
|
+
unsubscribe();
|
|
1411
|
+
};
|
|
1146
1412
|
}
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
if (
|
|
1150
|
-
|
|
1151
|
-
|
|
1413
|
+
}, [streamId]);
|
|
1414
|
+
const stop = useCallback3(() => {
|
|
1415
|
+
if (streamId) {
|
|
1416
|
+
StreamPlaybackManager.stop(streamId);
|
|
1417
|
+
if (!isSubscriptionMode || isSwitchedToIndependent) {
|
|
1418
|
+
setInternalStreamId("");
|
|
1419
|
+
setIsSwitchedToIndependent(false);
|
|
1420
|
+
}
|
|
1152
1421
|
}
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
return;
|
|
1422
|
+
if (fallbackUtteranceRef.current) {
|
|
1423
|
+
window.speechSynthesis.cancel();
|
|
1424
|
+
fallbackUtteranceRef.current = null;
|
|
1157
1425
|
}
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1426
|
+
isFallbackRef.current = false;
|
|
1427
|
+
setState((prev) => ({
|
|
1428
|
+
...prev,
|
|
1429
|
+
isPlaying: false,
|
|
1430
|
+
isPaused: false,
|
|
1431
|
+
isSynthesizing: false,
|
|
1432
|
+
progress: 0
|
|
1433
|
+
}));
|
|
1434
|
+
onStop?.();
|
|
1435
|
+
}, [streamId, isSubscriptionMode, isSwitchedToIndependent, onStop]);
|
|
1436
|
+
const pause = useCallback3(() => {
|
|
1437
|
+
if (isFallbackRef.current) {
|
|
1438
|
+
window.speechSynthesis.pause();
|
|
1439
|
+
setState((prev) => ({ ...prev, isPaused: true, isPlaying: false }));
|
|
1440
|
+
onPlayPause?.();
|
|
1441
|
+
} else if (streamId) {
|
|
1442
|
+
StreamPlaybackManager.pause(streamId);
|
|
1161
1443
|
}
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1444
|
+
}, [streamId, onPlayPause]);
|
|
1445
|
+
const resume = useCallback3(() => {
|
|
1446
|
+
if (isFallbackRef.current) {
|
|
1447
|
+
window.speechSynthesis.resume();
|
|
1448
|
+
setState((prev) => ({ ...prev, isPaused: false, isPlaying: true }));
|
|
1449
|
+
onPlayResume?.();
|
|
1450
|
+
} else if (streamId) {
|
|
1451
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1452
|
+
if (session) {
|
|
1453
|
+
StreamPlaybackManager.resume(streamId);
|
|
1454
|
+
} else {
|
|
1455
|
+
console.log(
|
|
1456
|
+
"[useMessageTTS] Session not found, resetting pause state"
|
|
1457
|
+
);
|
|
1458
|
+
setState((prev) => ({ ...prev, isPaused: false, isPlaying: false }));
|
|
1459
|
+
}
|
|
1172
1460
|
}
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1461
|
+
}, [streamId, onPlayResume]);
|
|
1462
|
+
const togglePlay = useCallback3(() => {
|
|
1463
|
+
if (state.isPlaying) {
|
|
1464
|
+
pause();
|
|
1465
|
+
} else {
|
|
1466
|
+
resume();
|
|
1467
|
+
}
|
|
1468
|
+
}, [state.isPlaying, pause, resume]);
|
|
1469
|
+
const playFallback = useCallback3(
|
|
1470
|
+
(text) => {
|
|
1471
|
+
console.warn("[useMessageTTS] Switching to fallback TTS");
|
|
1472
|
+
stop();
|
|
1473
|
+
isFallbackRef.current = true;
|
|
1474
|
+
setErrorState(null);
|
|
1475
|
+
const utterance = new SpeechSynthesisUtterance(text);
|
|
1476
|
+
utterance.rate = audioParams?.speech_rate || 1;
|
|
1477
|
+
const voices = window.speechSynthesis.getVoices();
|
|
1478
|
+
const zhVoice = voices.find((v) => v.lang.includes("zh"));
|
|
1479
|
+
if (zhVoice) utterance.voice = zhVoice;
|
|
1480
|
+
utterance.onstart = () => {
|
|
1481
|
+
setState((prev) => ({ ...prev, isPlaying: true, isPaused: false }));
|
|
1482
|
+
onPlayStart?.();
|
|
1483
|
+
};
|
|
1484
|
+
utterance.onend = () => {
|
|
1485
|
+
setState((prev) => ({
|
|
1486
|
+
...prev,
|
|
1487
|
+
isPlaying: false,
|
|
1488
|
+
isPaused: false,
|
|
1489
|
+
progress: 100
|
|
1490
|
+
}));
|
|
1491
|
+
onPlayEnd?.();
|
|
1492
|
+
};
|
|
1493
|
+
utterance.onerror = (e) => {
|
|
1494
|
+
console.error("[useMessageTTS] Fallback TTS failed", e);
|
|
1495
|
+
setErrorState("Fallback TTS failed");
|
|
1496
|
+
onError?.(new Error("Fallback TTS failed"));
|
|
1497
|
+
};
|
|
1498
|
+
fallbackUtteranceRef.current = utterance;
|
|
1499
|
+
window.speechSynthesis.speak(utterance);
|
|
1500
|
+
},
|
|
1501
|
+
[audioParams, onError, onPlayEnd, onPlayStart, stop]
|
|
1502
|
+
);
|
|
1503
|
+
const handleError = useCallback3(
|
|
1504
|
+
(text, failedVoice) => {
|
|
1505
|
+
if (fallbackVoice && failedVoice !== fallbackVoice) {
|
|
1506
|
+
console.warn(
|
|
1507
|
+
`[useMessageTTS] Voice ${failedVoice} failed, switching to fallback voice ${fallbackVoice}`
|
|
1508
|
+
);
|
|
1509
|
+
const newId = internalStreamId || `msg-tts-retry-${Date.now()}`;
|
|
1510
|
+
setInternalStreamId(newId);
|
|
1511
|
+
const session = StreamPlaybackManager.createSession(newId, {
|
|
1512
|
+
ttsConfig,
|
|
1513
|
+
audioParams: { ...audioParams, speaker: fallbackVoice },
|
|
1514
|
+
autoPlay,
|
|
1515
|
+
metricsCollector,
|
|
1516
|
+
visualization,
|
|
1517
|
+
onPlayStart,
|
|
1518
|
+
onPlayPause,
|
|
1519
|
+
onPlayResume,
|
|
1520
|
+
onPlayEnd,
|
|
1521
|
+
onError: () => playFallback(text)
|
|
1522
|
+
});
|
|
1523
|
+
session.play(text);
|
|
1524
|
+
} else {
|
|
1525
|
+
playFallback(text);
|
|
1526
|
+
}
|
|
1527
|
+
},
|
|
1528
|
+
[
|
|
1529
|
+
fallbackVoice,
|
|
1530
|
+
playFallback,
|
|
1531
|
+
ttsConfig,
|
|
1532
|
+
audioParams,
|
|
1533
|
+
autoPlay,
|
|
1534
|
+
metricsCollector,
|
|
1535
|
+
visualization,
|
|
1536
|
+
onPlayStart,
|
|
1537
|
+
onPlayPause,
|
|
1538
|
+
onPlayResume,
|
|
1539
|
+
onPlayEnd,
|
|
1540
|
+
internalStreamId
|
|
1541
|
+
]
|
|
1542
|
+
);
|
|
1543
|
+
const play = useCallback3(
|
|
1544
|
+
async (text) => {
|
|
1545
|
+
let shouldSwitchToIndependent = false;
|
|
1546
|
+
if (isSubscriptionMode) {
|
|
1547
|
+
const session2 = StreamPlaybackManager.getSession(externalStreamId || "");
|
|
1548
|
+
if (!session2) {
|
|
1549
|
+
console.log(
|
|
1550
|
+
"[useMessageTTS] Stream session not found, switching to independent play mode"
|
|
1551
|
+
);
|
|
1552
|
+
shouldSwitchToIndependent = true;
|
|
1553
|
+
setIsSwitchedToIndependent(true);
|
|
1554
|
+
} else if (session2.state.isStreamFinished) {
|
|
1555
|
+
console.log(
|
|
1556
|
+
"[useMessageTTS] Stream finished, switching to independent play mode"
|
|
1557
|
+
);
|
|
1558
|
+
shouldSwitchToIndependent = true;
|
|
1559
|
+
setIsSwitchedToIndependent(true);
|
|
1560
|
+
} else if (session2.state.isSynthesizing || session2.state.isPlaying) {
|
|
1561
|
+
console.warn(
|
|
1562
|
+
"[useMessageTTS] play() called in subscription mode while streaming, ignoring"
|
|
1563
|
+
);
|
|
1564
|
+
return;
|
|
1565
|
+
} else {
|
|
1566
|
+
console.log(
|
|
1567
|
+
"[useMessageTTS] Stream not active, switching to independent play mode"
|
|
1568
|
+
);
|
|
1569
|
+
shouldSwitchToIndependent = true;
|
|
1570
|
+
setIsSwitchedToIndependent(true);
|
|
1571
|
+
}
|
|
1192
1572
|
}
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1573
|
+
currentTextRef.current = text;
|
|
1574
|
+
stop();
|
|
1575
|
+
setErrorState(null);
|
|
1576
|
+
isFallbackRef.current = false;
|
|
1577
|
+
const id = `msg-tts-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
1578
|
+
const session = StreamPlaybackManager.createSession(id, {
|
|
1579
|
+
ttsConfig,
|
|
1580
|
+
audioParams,
|
|
1581
|
+
autoPlay,
|
|
1582
|
+
metricsCollector,
|
|
1583
|
+
visualization,
|
|
1584
|
+
onPlayStart,
|
|
1585
|
+
onPlayPause,
|
|
1586
|
+
onPlayResume,
|
|
1587
|
+
onPlayEnd,
|
|
1588
|
+
onError: (err) => {
|
|
1589
|
+
handleError(text, audioParams?.speaker || "");
|
|
1590
|
+
}
|
|
1591
|
+
});
|
|
1592
|
+
setInternalStreamId(id);
|
|
1593
|
+
await session.play(text);
|
|
1594
|
+
},
|
|
1595
|
+
[
|
|
1596
|
+
isSubscriptionMode,
|
|
1597
|
+
externalStreamId,
|
|
1598
|
+
stop,
|
|
1599
|
+
ttsConfig,
|
|
1600
|
+
audioParams,
|
|
1601
|
+
autoPlay,
|
|
1602
|
+
metricsCollector,
|
|
1603
|
+
visualization,
|
|
1604
|
+
onPlayStart,
|
|
1605
|
+
onPlayPause,
|
|
1606
|
+
onPlayResume,
|
|
1607
|
+
onPlayEnd,
|
|
1608
|
+
handleError
|
|
1609
|
+
]
|
|
1610
|
+
);
|
|
1611
|
+
const seek = useCallback3(
|
|
1612
|
+
(percentage) => {
|
|
1613
|
+
if (streamId) {
|
|
1614
|
+
StreamPlaybackManager.getSession(streamId)?.seek(percentage);
|
|
1211
1615
|
}
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
totalChars: this.segments.reduce((sum, seg) => sum + seg.length, 0)
|
|
1250
|
-
};
|
|
1251
|
-
}
|
|
1252
|
-
};
|
|
1616
|
+
},
|
|
1617
|
+
[streamId]
|
|
1618
|
+
);
|
|
1619
|
+
const getFrequencyData = useCallback3(
|
|
1620
|
+
() => state.visualizationData.frequencyData,
|
|
1621
|
+
[state.visualizationData]
|
|
1622
|
+
);
|
|
1623
|
+
const getTimeDomainData = useCallback3(
|
|
1624
|
+
() => state.visualizationData.timeDomainData,
|
|
1625
|
+
[state.visualizationData]
|
|
1626
|
+
);
|
|
1627
|
+
const isStreamActive = !!(externalStreamId && (state.isPlaying || state.isPaused || state.isSynthesizing));
|
|
1628
|
+
const canResume = useCallback3(() => {
|
|
1629
|
+
if (!streamId) return false;
|
|
1630
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1631
|
+
return !!session;
|
|
1632
|
+
}, [streamId]);
|
|
1633
|
+
return {
|
|
1634
|
+
isPlaying: state.isPlaying,
|
|
1635
|
+
isPaused: state.isPaused,
|
|
1636
|
+
isSynthesizing: state.isSynthesizing,
|
|
1637
|
+
progress: state.progress,
|
|
1638
|
+
error,
|
|
1639
|
+
play,
|
|
1640
|
+
pause,
|
|
1641
|
+
resume,
|
|
1642
|
+
stop,
|
|
1643
|
+
togglePlay,
|
|
1644
|
+
seek,
|
|
1645
|
+
getFrequencyData,
|
|
1646
|
+
getTimeDomainData,
|
|
1647
|
+
visualizationData: state.visualizationData,
|
|
1648
|
+
isStreamActive,
|
|
1649
|
+
streamState: state,
|
|
1650
|
+
canResume
|
|
1651
|
+
};
|
|
1652
|
+
}
|
|
1253
1653
|
|
|
1254
1654
|
// src/tts/useStreamTTS.ts
|
|
1255
|
-
|
|
1256
|
-
var activeInstances2 = /* @__PURE__ */ new Map();
|
|
1257
|
-
var sessionAudioCache = /* @__PURE__ */ new Map();
|
|
1258
|
-
function buildFullUrl3(url, params) {
|
|
1259
|
-
const arr = [];
|
|
1260
|
-
for (const key in params) {
|
|
1261
|
-
if (Object.prototype.hasOwnProperty.call(params, key)) {
|
|
1262
|
-
arr.push(`${key}=${encodeURIComponent(params[key])}`);
|
|
1263
|
-
}
|
|
1264
|
-
}
|
|
1265
|
-
return `${url}?${arr.join("&")}`;
|
|
1266
|
-
}
|
|
1655
|
+
import { useCallback as useCallback4, useEffect as useEffect3, useRef as useRef4, useState as useState4 } from "react";
|
|
1267
1656
|
function useStreamTTS({
|
|
1268
1657
|
ttsConfig,
|
|
1269
1658
|
audioParams,
|
|
@@ -1277,427 +1666,131 @@ function useStreamTTS({
|
|
|
1277
1666
|
visualization,
|
|
1278
1667
|
maxSegmentLength = 150
|
|
1279
1668
|
}) {
|
|
1280
|
-
const [
|
|
1281
|
-
const
|
|
1282
|
-
const [
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1669
|
+
const [streamId, setStreamId] = useState4("");
|
|
1670
|
+
const streamIdRef = useRef4("");
|
|
1671
|
+
const [state, setState] = useState4({
|
|
1672
|
+
isPlaying: false,
|
|
1673
|
+
isPaused: false,
|
|
1674
|
+
isSynthesizing: false,
|
|
1675
|
+
progress: 0,
|
|
1676
|
+
visualizationData: {
|
|
1677
|
+
frequencyData: new Uint8Array(0),
|
|
1678
|
+
timeDomainData: new Uint8Array(0)
|
|
1679
|
+
},
|
|
1680
|
+
error: null,
|
|
1681
|
+
isConnected: false,
|
|
1682
|
+
isSessionStarted: false,
|
|
1683
|
+
isStreamFinished: false
|
|
1291
1684
|
});
|
|
1292
|
-
const
|
|
1293
|
-
const clientRef = useRef4(null);
|
|
1294
|
-
const audioRef = useRef4(null);
|
|
1295
|
-
const audioContextRef = useRef4(null);
|
|
1296
|
-
const analyserRef = useRef4(null);
|
|
1297
|
-
const sourceRef = useRef4(null);
|
|
1298
|
-
const audioUrlRef = useRef4(null);
|
|
1685
|
+
const [streamText, setStreamText] = useState4("");
|
|
1299
1686
|
const streamTextRef = useRef4("");
|
|
1300
|
-
const isConnectedRef = useRef4(false);
|
|
1301
|
-
const isSessionStartedRef = useRef4(false);
|
|
1302
|
-
const calledSessionStartedRef = useRef4(false);
|
|
1303
|
-
const splitterRef = useRef4(null);
|
|
1304
|
-
const segmentQueueRef = useRef4([]);
|
|
1305
|
-
const isSendingRef = useRef4(false);
|
|
1306
|
-
const sessionAudioBuffersRef = useRef4([]);
|
|
1307
|
-
const isStreamFinishedRef = useRef4(false);
|
|
1308
|
-
const isSessionFinishedRef = useRef4(false);
|
|
1309
|
-
const resolveAllSegmentsSentRef = useRef4(null);
|
|
1310
|
-
const currentVoiceRef = useRef4("");
|
|
1311
|
-
const initAudioContext = useCallback4(() => {
|
|
1312
|
-
if (!audioRef.current) return;
|
|
1313
|
-
if (!audioContextRef.current) {
|
|
1314
|
-
const AudioContextClass = window.AudioContext || window.webkitAudioContext;
|
|
1315
|
-
audioContextRef.current = new AudioContextClass();
|
|
1316
|
-
}
|
|
1317
|
-
if (audioContextRef.current.state === "suspended") {
|
|
1318
|
-
audioContextRef.current.resume();
|
|
1319
|
-
}
|
|
1320
|
-
if (!analyserRef.current) {
|
|
1321
|
-
analyserRef.current = audioContextRef.current.createAnalyser();
|
|
1322
|
-
analyserRef.current.fftSize = visualization?.fftSize || 256;
|
|
1323
|
-
}
|
|
1324
|
-
if (!sourceRef.current) {
|
|
1325
|
-
try {
|
|
1326
|
-
sourceRef.current = audioContextRef.current.createMediaElementSource(audioRef.current);
|
|
1327
|
-
sourceRef.current.connect(analyserRef.current);
|
|
1328
|
-
analyserRef.current.connect(audioContextRef.current.destination);
|
|
1329
|
-
} catch (e) {
|
|
1330
|
-
}
|
|
1331
|
-
}
|
|
1332
|
-
}, [visualization?.fftSize]);
|
|
1333
|
-
const cleanupAudio = useCallback4(() => {
|
|
1334
|
-
if (audioUrlRef.current) {
|
|
1335
|
-
URL.revokeObjectURL(audioUrlRef.current);
|
|
1336
|
-
audioUrlRef.current = null;
|
|
1337
|
-
}
|
|
1338
|
-
if (audioRef.current) {
|
|
1339
|
-
audioRef.current.onerror = null;
|
|
1340
|
-
audioRef.current.onended = null;
|
|
1341
|
-
audioRef.current.onpause = null;
|
|
1342
|
-
audioRef.current.onplay = null;
|
|
1343
|
-
audioRef.current.ontimeupdate = null;
|
|
1344
|
-
audioRef.current.pause();
|
|
1345
|
-
audioRef.current.src = "";
|
|
1346
|
-
audioRef.current = null;
|
|
1347
|
-
}
|
|
1348
|
-
if (sourceRef.current) {
|
|
1349
|
-
try {
|
|
1350
|
-
sourceRef.current.disconnect();
|
|
1351
|
-
} catch (e) {
|
|
1352
|
-
}
|
|
1353
|
-
sourceRef.current = null;
|
|
1354
|
-
}
|
|
1355
|
-
}, []);
|
|
1356
|
-
const stopOthers = useCallback4(() => {
|
|
1357
|
-
activeInstances2.forEach((instance, id) => {
|
|
1358
|
-
if (id !== instanceId) {
|
|
1359
|
-
instance.pause();
|
|
1360
|
-
}
|
|
1361
|
-
});
|
|
1362
|
-
}, [instanceId]);
|
|
1363
|
-
const pause = useCallback4(() => {
|
|
1364
|
-
if (audioRef.current) {
|
|
1365
|
-
audioRef.current.pause();
|
|
1366
|
-
}
|
|
1367
|
-
setIsPaused(true);
|
|
1368
|
-
setIsPlaying(false);
|
|
1369
|
-
onPlayPause?.();
|
|
1370
|
-
}, [onPlayPause]);
|
|
1371
|
-
const resume = useCallback4(() => {
|
|
1372
|
-
stopOthers();
|
|
1373
|
-
if (audioRef.current) {
|
|
1374
|
-
audioRef.current.play();
|
|
1375
|
-
}
|
|
1376
|
-
setIsPaused(false);
|
|
1377
|
-
setIsPlaying(true);
|
|
1378
|
-
onPlayResume?.();
|
|
1379
|
-
activeInstances2.set(instanceId, { pause });
|
|
1380
|
-
}, [stopOthers, instanceId, pause, onPlayResume]);
|
|
1381
|
-
const sendNextSegment = useCallback4(() => {
|
|
1382
|
-
if (!clientRef.current || !isSessionStartedRef.current || isSendingRef.current || isSessionFinishedRef.current) {
|
|
1383
|
-
return;
|
|
1384
|
-
}
|
|
1385
|
-
if (segmentQueueRef.current.length === 0) {
|
|
1386
|
-
if (isStreamFinishedRef.current && !isSessionFinishedRef.current) {
|
|
1387
|
-
console.log("[useStreamTTS] All segments sent, finishing session");
|
|
1388
|
-
isSessionFinishedRef.current = true;
|
|
1389
|
-
clientRef.current.finishSession();
|
|
1390
|
-
resolveAllSegmentsSentRef.current?.();
|
|
1391
|
-
}
|
|
1392
|
-
return;
|
|
1393
|
-
}
|
|
1394
|
-
isSendingRef.current = true;
|
|
1395
|
-
const segment = segmentQueueRef.current.shift();
|
|
1396
|
-
console.log(`[useStreamTTS] Sending segment ${segment.index}: ${segment.content.substring(0, 30)}...`);
|
|
1397
|
-
clientRef.current.sendText(segment.content);
|
|
1398
|
-
segment.sent = true;
|
|
1399
|
-
isSendingRef.current = false;
|
|
1400
|
-
setTimeout(() => sendNextSegment(), 0);
|
|
1401
|
-
}, []);
|
|
1402
|
-
const stop = useCallback4(() => {
|
|
1403
|
-
if (clientRef.current) {
|
|
1404
|
-
clientRef.current.close();
|
|
1405
|
-
clientRef.current = null;
|
|
1406
|
-
}
|
|
1407
|
-
cleanupAudio();
|
|
1408
|
-
setIsConnected(false);
|
|
1409
|
-
isConnectedRef.current = false;
|
|
1410
|
-
setIsSessionStarted(false);
|
|
1411
|
-
isSessionStartedRef.current = false;
|
|
1412
|
-
calledSessionStartedRef.current = false;
|
|
1413
|
-
setIsPlaying(false);
|
|
1414
|
-
setIsPaused(false);
|
|
1415
|
-
setIsSynthesizing(false);
|
|
1416
|
-
setProgress(0);
|
|
1417
|
-
activeInstances2.delete(instanceId);
|
|
1418
|
-
streamTextRef.current = "";
|
|
1419
|
-
setStreamText("");
|
|
1420
|
-
segmentQueueRef.current = [];
|
|
1421
|
-
isSendingRef.current = false;
|
|
1422
|
-
sessionAudioBuffersRef.current = [];
|
|
1423
|
-
isStreamFinishedRef.current = false;
|
|
1424
|
-
isSessionFinishedRef.current = false;
|
|
1425
|
-
splitterRef.current?.reset();
|
|
1426
|
-
}, [cleanupAudio, instanceId]);
|
|
1427
1687
|
const connect = useCallback4(async () => {
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
sessionAudioBuffersRef.current = [];
|
|
1432
|
-
isStreamFinishedRef.current = false;
|
|
1688
|
+
const newStreamId = `tts-stream-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
1689
|
+
setStreamId(newStreamId);
|
|
1690
|
+
streamIdRef.current = newStreamId;
|
|
1433
1691
|
streamTextRef.current = "";
|
|
1434
1692
|
setStreamText("");
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
try {
|
|
1450
|
-
const audio = new Audio();
|
|
1451
|
-
audio.crossOrigin = "anonymous";
|
|
1452
|
-
audioRef.current = audio;
|
|
1453
|
-
audio.onplay = () => {
|
|
1454
|
-
setIsPlaying(true);
|
|
1455
|
-
setIsPaused(false);
|
|
1456
|
-
onPlayStart?.();
|
|
1457
|
-
initAudioContext();
|
|
1458
|
-
activeInstances2.set(instanceId, { pause });
|
|
1459
|
-
};
|
|
1460
|
-
audio.onended = () => {
|
|
1461
|
-
setIsPlaying(false);
|
|
1462
|
-
setIsPaused(false);
|
|
1463
|
-
onPlayEnd?.();
|
|
1464
|
-
activeInstances2.delete(instanceId);
|
|
1465
|
-
};
|
|
1466
|
-
audio.onerror = (e) => {
|
|
1467
|
-
console.error("[useStreamTTS] Audio playback error:", e, audio.error);
|
|
1468
|
-
setErrorState(audio.error?.message || "Audio playback error");
|
|
1469
|
-
onError?.(new Error(audio.error?.message || "Audio playback error"));
|
|
1470
|
-
};
|
|
1471
|
-
audio.ontimeupdate = () => {
|
|
1472
|
-
let duration = audio.duration;
|
|
1473
|
-
if (!isFinite(duration) && audio.buffered.length > 0) {
|
|
1474
|
-
duration = audio.buffered.end(audio.buffered.length - 1);
|
|
1475
|
-
}
|
|
1476
|
-
if (isFinite(duration) && duration > 0) {
|
|
1477
|
-
setProgress(audio.currentTime / duration * 100);
|
|
1478
|
-
}
|
|
1479
|
-
};
|
|
1480
|
-
clientRef.current = WebsocketMSE3({ autoStartSession: false });
|
|
1481
|
-
splitterRef.current = new StreamingTextSplitter({
|
|
1482
|
-
maxLength: maxSegmentLength,
|
|
1483
|
-
onSegmentComplete: (segment) => {
|
|
1484
|
-
segmentQueueRef.current.push(segment);
|
|
1485
|
-
console.log(`[useStreamTTS] Segment ${segment.index} queued (${segment.length} chars)`);
|
|
1486
|
-
if (isSessionStartedRef.current) {
|
|
1487
|
-
sendNextSegment();
|
|
1488
|
-
}
|
|
1489
|
-
},
|
|
1490
|
-
onAllComplete: () => {
|
|
1491
|
-
console.log(`[useStreamTTS] All segments completed, total: ${segmentQueueRef.current.length} in queue`);
|
|
1492
|
-
}
|
|
1493
|
-
});
|
|
1494
|
-
const url = clientRef.current.start({
|
|
1495
|
-
url: buildFullUrl3(WS_URL2, {
|
|
1496
|
-
api_access_key: `Jwt; ${ttsConfig.token}`,
|
|
1497
|
-
api_app_key: ttsConfig.appid,
|
|
1498
|
-
api_resource_id: ttsConfig.resourceId || "seed-tts-2.0"
|
|
1499
|
-
}),
|
|
1500
|
-
config: {
|
|
1501
|
-
user: {
|
|
1502
|
-
uid: `req-${Date.now()}`
|
|
1503
|
-
},
|
|
1504
|
-
namespace: ttsConfig.namespace || "BidirectionalTTS",
|
|
1505
|
-
req_params: {
|
|
1506
|
-
speaker: voice,
|
|
1507
|
-
audio_params: {
|
|
1508
|
-
sample_rate: audioParams?.sample_rate || 24e3,
|
|
1509
|
-
format: audioParams?.format || "mp3",
|
|
1510
|
-
speech_rate: audioParams?.speech_rate,
|
|
1511
|
-
pitch_rate: audioParams?.pitch_rate,
|
|
1512
|
-
loudness_rate: audioParams?.loudness_rate
|
|
1513
|
-
},
|
|
1514
|
-
additions: JSON.stringify({
|
|
1515
|
-
enable_language_detector: true,
|
|
1516
|
-
disable_markdown_filter: true,
|
|
1517
|
-
enable_latex_tn: true
|
|
1518
|
-
})
|
|
1519
|
-
}
|
|
1520
|
-
},
|
|
1521
|
-
// ===== 关键回调 =====
|
|
1522
|
-
onStart: () => {
|
|
1523
|
-
setIsConnected(true);
|
|
1524
|
-
isConnectedRef.current = true;
|
|
1525
|
-
console.log("[useStreamTTS] WebSocket connected, waiting for text...");
|
|
1526
|
-
},
|
|
1527
|
-
onSessionStarted: () => {
|
|
1528
|
-
setIsSessionStarted(true);
|
|
1529
|
-
isSessionStartedRef.current = true;
|
|
1530
|
-
console.log("[useStreamTTS] Session started, can send text now");
|
|
1531
|
-
if (segmentQueueRef.current.length > 0) {
|
|
1532
|
-
sendNextSegment();
|
|
1533
|
-
}
|
|
1534
|
-
},
|
|
1535
|
-
onMessage: (data) => {
|
|
1536
|
-
setIsSynthesizing(true);
|
|
1537
|
-
if (sessionAudioBuffersRef.current.length === 0) {
|
|
1538
|
-
metricsCollector.record({
|
|
1539
|
-
name: "tts_latency",
|
|
1540
|
-
labels: { stage: "first_packet", voice },
|
|
1541
|
-
value: Date.now() - startTime,
|
|
1542
|
-
timestamp: Date.now()
|
|
1543
|
-
});
|
|
1544
|
-
}
|
|
1545
|
-
const buffer = data instanceof ArrayBuffer ? data.slice(0) : new Uint8Array(data).buffer;
|
|
1546
|
-
sessionAudioBuffersRef.current.push(buffer);
|
|
1547
|
-
},
|
|
1548
|
-
onSessionFinished: () => {
|
|
1549
|
-
setIsSynthesizing(false);
|
|
1550
|
-
setIsSessionStarted(false);
|
|
1551
|
-
isSessionStartedRef.current = false;
|
|
1552
|
-
calledSessionStartedRef.current = false;
|
|
1553
|
-
if (sessionAudioBuffersRef.current.length > 0 && streamTextRef.current) {
|
|
1554
|
-
const speed = audioParams?.speech_rate || 0;
|
|
1555
|
-
const cacheKey = TTSCache.generateKey(streamTextRef.current, voice, speed);
|
|
1556
|
-
TTSCache.set(cacheKey, [...sessionAudioBuffersRef.current]);
|
|
1557
|
-
sessionAudioCache.set(instanceId, {
|
|
1558
|
-
streamText: streamTextRef.current,
|
|
1559
|
-
audioBuffers: [...sessionAudioBuffersRef.current],
|
|
1560
|
-
timestamp: Date.now(),
|
|
1561
|
-
voice,
|
|
1562
|
-
speed
|
|
1563
|
-
});
|
|
1564
|
-
console.log(`[useStreamTTS] Session finished, cached ${sessionAudioBuffersRef.current.length} audio buffers`);
|
|
1565
|
-
}
|
|
1566
|
-
metricsCollector.record({
|
|
1567
|
-
name: "tts_synthesis_finished",
|
|
1568
|
-
labels: { voice, text_length: streamTextRef.current.length },
|
|
1569
|
-
value: Date.now() - startTime,
|
|
1570
|
-
timestamp: Date.now()
|
|
1571
|
-
});
|
|
1572
|
-
},
|
|
1573
|
-
onError: (err) => {
|
|
1574
|
-
console.error("[useStreamTTS] TTS error:", err);
|
|
1575
|
-
setErrorState(err.msg || "TTS error");
|
|
1576
|
-
onError?.(new Error(err.msg || "TTS error"));
|
|
1577
|
-
setIsSynthesizing(false);
|
|
1578
|
-
}
|
|
1579
|
-
});
|
|
1580
|
-
audioUrlRef.current = url;
|
|
1581
|
-
audio.src = url;
|
|
1582
|
-
if (autoPlay) {
|
|
1583
|
-
try {
|
|
1584
|
-
await audio.play();
|
|
1585
|
-
} catch (e) {
|
|
1586
|
-
console.warn("[useStreamTTS] Autoplay blocked:", e);
|
|
1587
|
-
}
|
|
1693
|
+
const session = StreamPlaybackManager.createSession(newStreamId, {
|
|
1694
|
+
ttsConfig,
|
|
1695
|
+
audioParams,
|
|
1696
|
+
autoPlay,
|
|
1697
|
+
metricsCollector,
|
|
1698
|
+
visualization,
|
|
1699
|
+
maxSegmentLength,
|
|
1700
|
+
onPlayStart,
|
|
1701
|
+
onPlayPause,
|
|
1702
|
+
onPlayResume,
|
|
1703
|
+
onPlayEnd,
|
|
1704
|
+
onError: (err) => {
|
|
1705
|
+
setState((prev) => ({ ...prev, error: err.message }));
|
|
1706
|
+
onError?.(err);
|
|
1588
1707
|
}
|
|
1589
|
-
}
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
1593
|
-
}
|
|
1708
|
+
});
|
|
1709
|
+
await session.connect();
|
|
1710
|
+
return newStreamId;
|
|
1594
1711
|
}, [
|
|
1595
1712
|
ttsConfig,
|
|
1596
1713
|
audioParams,
|
|
1597
1714
|
autoPlay,
|
|
1598
|
-
stop,
|
|
1599
|
-
instanceId,
|
|
1600
|
-
onPlayStart,
|
|
1601
|
-
onPlayEnd,
|
|
1602
|
-
initAudioContext,
|
|
1603
|
-
pause,
|
|
1604
1715
|
metricsCollector,
|
|
1716
|
+
visualization,
|
|
1605
1717
|
maxSegmentLength,
|
|
1606
|
-
|
|
1718
|
+
onPlayStart,
|
|
1719
|
+
onPlayPause,
|
|
1720
|
+
onPlayResume,
|
|
1721
|
+
onPlayEnd,
|
|
1607
1722
|
onError
|
|
1608
1723
|
]);
|
|
1724
|
+
useEffect3(() => {
|
|
1725
|
+
if (!streamId) return;
|
|
1726
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1727
|
+
if (!session) return;
|
|
1728
|
+
const unsubscribe = session.subscribe((newState) => {
|
|
1729
|
+
setState(newState);
|
|
1730
|
+
});
|
|
1731
|
+
return () => {
|
|
1732
|
+
unsubscribe();
|
|
1733
|
+
};
|
|
1734
|
+
}, [streamId]);
|
|
1609
1735
|
const onMessage = useCallback4((chunk) => {
|
|
1610
|
-
if (!
|
|
1736
|
+
if (!streamIdRef.current) return;
|
|
1611
1737
|
streamTextRef.current += chunk;
|
|
1612
1738
|
setStreamText(streamTextRef.current);
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
calledSessionStartedRef.current = true;
|
|
1616
|
-
clientRef.current.startSession();
|
|
1617
|
-
}
|
|
1618
|
-
splitterRef.current?.onChunk(chunk);
|
|
1739
|
+
const session = StreamPlaybackManager.getSession(streamIdRef.current);
|
|
1740
|
+
session?.handleStreamChunk(chunk);
|
|
1619
1741
|
}, []);
|
|
1620
1742
|
const finishStream = useCallback4(async () => {
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
if (segmentQueueRef.current.length > 0 || isSendingRef.current) {
|
|
1625
|
-
await new Promise((resolve) => {
|
|
1626
|
-
resolveAllSegmentsSentRef.current = resolve;
|
|
1627
|
-
});
|
|
1628
|
-
} else if (clientRef.current && isSessionStartedRef.current && !isSessionFinishedRef.current) {
|
|
1629
|
-
isSessionFinishedRef.current = true;
|
|
1630
|
-
clientRef.current.finishSession();
|
|
1631
|
-
}
|
|
1743
|
+
if (!streamIdRef.current) return;
|
|
1744
|
+
const session = StreamPlaybackManager.getSession(streamIdRef.current);
|
|
1745
|
+
await session?.finishStream();
|
|
1632
1746
|
}, []);
|
|
1633
|
-
const
|
|
1634
|
-
if (
|
|
1635
|
-
|
|
1636
|
-
if (!isFinite(duration) && audioRef.current.buffered.length > 0) {
|
|
1637
|
-
duration = audioRef.current.buffered.end(audioRef.current.buffered.length - 1);
|
|
1638
|
-
}
|
|
1639
|
-
if (isFinite(duration) && duration > 0) {
|
|
1640
|
-
const time = percentage / 100 * duration;
|
|
1641
|
-
if (isFinite(time)) {
|
|
1642
|
-
audioRef.current.currentTime = time;
|
|
1643
|
-
setProgress(percentage);
|
|
1644
|
-
}
|
|
1645
|
-
}
|
|
1747
|
+
const pause = useCallback4(() => {
|
|
1748
|
+
if (streamIdRef.current) {
|
|
1749
|
+
StreamPlaybackManager.pause(streamIdRef.current);
|
|
1646
1750
|
}
|
|
1647
1751
|
}, []);
|
|
1648
|
-
const
|
|
1649
|
-
if (
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
return dataArray;
|
|
1752
|
+
const resume = useCallback4(() => {
|
|
1753
|
+
if (streamIdRef.current) {
|
|
1754
|
+
StreamPlaybackManager.resume(streamIdRef.current);
|
|
1755
|
+
}
|
|
1653
1756
|
}, []);
|
|
1654
|
-
const
|
|
1655
|
-
if (
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1757
|
+
const stop = useCallback4(() => {
|
|
1758
|
+
if (streamIdRef.current) {
|
|
1759
|
+
StreamPlaybackManager.stop(streamIdRef.current);
|
|
1760
|
+
setStreamId("");
|
|
1761
|
+
streamIdRef.current = "";
|
|
1762
|
+
}
|
|
1659
1763
|
}, []);
|
|
1660
|
-
|
|
1661
|
-
if (
|
|
1662
|
-
|
|
1663
|
-
let lastUpdate = 0;
|
|
1664
|
-
const interval = visualization.refreshInterval || 0;
|
|
1665
|
-
const update = (timestamp) => {
|
|
1666
|
-
if (isPlaying && !isPaused) {
|
|
1667
|
-
if (timestamp - lastUpdate >= interval) {
|
|
1668
|
-
setVisualizationData({
|
|
1669
|
-
frequencyData: getFrequencyData(),
|
|
1670
|
-
timeDomainData: getTimeDomainData()
|
|
1671
|
-
});
|
|
1672
|
-
lastUpdate = timestamp;
|
|
1673
|
-
}
|
|
1674
|
-
animId = requestAnimationFrame(update);
|
|
1675
|
-
}
|
|
1676
|
-
};
|
|
1677
|
-
if (isPlaying && !isPaused) {
|
|
1678
|
-
animId = requestAnimationFrame(update);
|
|
1764
|
+
const seek = useCallback4((percentage) => {
|
|
1765
|
+
if (streamIdRef.current) {
|
|
1766
|
+
StreamPlaybackManager.getSession(streamIdRef.current)?.seek(percentage);
|
|
1679
1767
|
}
|
|
1680
|
-
|
|
1681
|
-
if (animId) cancelAnimationFrame(animId);
|
|
1682
|
-
};
|
|
1683
|
-
}, [isPlaying, isPaused, visualization, getFrequencyData, getTimeDomainData]);
|
|
1768
|
+
}, []);
|
|
1684
1769
|
useEffect3(() => {
|
|
1685
1770
|
return () => {
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
audioContextRef.current.close();
|
|
1771
|
+
if (streamIdRef.current) {
|
|
1772
|
+
StreamPlaybackManager.stop(streamIdRef.current);
|
|
1689
1773
|
}
|
|
1690
1774
|
};
|
|
1691
|
-
}, [
|
|
1775
|
+
}, []);
|
|
1776
|
+
const getFrequencyData = useCallback4(
|
|
1777
|
+
() => state.visualizationData.frequencyData,
|
|
1778
|
+
[state.visualizationData]
|
|
1779
|
+
);
|
|
1780
|
+
const getTimeDomainData = useCallback4(
|
|
1781
|
+
() => state.visualizationData.timeDomainData,
|
|
1782
|
+
[state.visualizationData]
|
|
1783
|
+
);
|
|
1692
1784
|
return {
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1785
|
+
streamId,
|
|
1786
|
+
isConnected: state.isConnected,
|
|
1787
|
+
isSessionStarted: state.isSessionStarted,
|
|
1788
|
+
isSynthesizing: state.isSynthesizing,
|
|
1789
|
+
isPlaying: state.isPlaying,
|
|
1790
|
+
isPaused: state.isPaused,
|
|
1791
|
+
error: state.error,
|
|
1699
1792
|
streamText,
|
|
1700
|
-
progress,
|
|
1793
|
+
progress: state.progress,
|
|
1701
1794
|
connect,
|
|
1702
1795
|
onMessage,
|
|
1703
1796
|
finishStream,
|
|
@@ -1707,23 +1800,9 @@ function useStreamTTS({
|
|
|
1707
1800
|
seek,
|
|
1708
1801
|
getFrequencyData,
|
|
1709
1802
|
getTimeDomainData,
|
|
1710
|
-
visualizationData
|
|
1803
|
+
visualizationData: state.visualizationData
|
|
1711
1804
|
};
|
|
1712
1805
|
}
|
|
1713
|
-
function getSessionAudioCache(instanceId) {
|
|
1714
|
-
return sessionAudioCache.get(instanceId);
|
|
1715
|
-
}
|
|
1716
|
-
function clearSessionAudioCache(instanceId) {
|
|
1717
|
-
sessionAudioCache.delete(instanceId);
|
|
1718
|
-
}
|
|
1719
|
-
function findSessionCacheByText(streamText, voice, speed) {
|
|
1720
|
-
for (const entry of sessionAudioCache.values()) {
|
|
1721
|
-
if (entry.streamText === streamText && entry.voice === voice && entry.speed === speed) {
|
|
1722
|
-
return entry;
|
|
1723
|
-
}
|
|
1724
|
-
}
|
|
1725
|
-
return void 0;
|
|
1726
|
-
}
|
|
1727
1806
|
|
|
1728
1807
|
// src/components/AudioWaveVisualizer.tsx
|
|
1729
1808
|
import { useEffect as useEffect4, useRef as useRef5 } from "react";
|
|
@@ -2098,10 +2177,8 @@ var AudioProgressBar_default = AudioProgressBar;
|
|
|
2098
2177
|
export {
|
|
2099
2178
|
AudioProgressBar_default as AudioProgressBar,
|
|
2100
2179
|
AudioWaveVisualizer_default as AudioWaveVisualizer,
|
|
2180
|
+
StreamPlaybackManager,
|
|
2101
2181
|
StreamingTextSplitter,
|
|
2102
|
-
clearSessionAudioCache,
|
|
2103
|
-
findSessionCacheByText,
|
|
2104
|
-
getSessionAudioCache,
|
|
2105
2182
|
splitTextByDelimiters,
|
|
2106
2183
|
useMessageTTS,
|
|
2107
2184
|
useStreamTTS,
|