@wq-hook/volcano-react 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +138 -40
- package/dist/index.d.ts +138 -40
- package/dist/index.js +1182 -1107
- package/dist/index.mjs +1181 -1104
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -32,10 +32,8 @@ var index_exports = {};
|
|
|
32
32
|
__export(index_exports, {
|
|
33
33
|
AudioProgressBar: () => AudioProgressBar_default,
|
|
34
34
|
AudioWaveVisualizer: () => AudioWaveVisualizer_default,
|
|
35
|
+
StreamPlaybackManager: () => StreamPlaybackManager,
|
|
35
36
|
StreamingTextSplitter: () => StreamingTextSplitter,
|
|
36
|
-
clearSessionAudioCache: () => clearSessionAudioCache,
|
|
37
|
-
findSessionCacheByText: () => findSessionCacheByText,
|
|
38
|
-
getSessionAudioCache: () => getSessionAudioCache,
|
|
39
37
|
splitTextByDelimiters: () => splitTextByDelimiters,
|
|
40
38
|
useMessageTTS: () => useMessageTTS,
|
|
41
39
|
useStreamTTS: () => useStreamTTS,
|
|
@@ -442,10 +440,253 @@ function useVolcanoTTS({
|
|
|
442
440
|
}
|
|
443
441
|
|
|
444
442
|
// src/tts/useMessageTTS.ts
|
|
443
|
+
var import_react3 = require("react");
|
|
444
|
+
|
|
445
|
+
// src/tts/StreamPlaybackManager.ts
|
|
445
446
|
var import_tts2 = require("@wq-hook/volcano-sdk/tts");
|
|
447
|
+
|
|
448
|
+
// src/tts/StreamingTextSplitter.ts
|
|
446
449
|
var import_volcano_sdk2 = require("@wq-hook/volcano-sdk");
|
|
447
|
-
var import_react3 = require("react");
|
|
448
450
|
var import_emoji_regex2 = __toESM(require("emoji-regex"));
|
|
451
|
+
var StreamingTextSplitter = class {
|
|
452
|
+
constructor(options = {}) {
|
|
453
|
+
/** 当前缓冲区 */
|
|
454
|
+
this.buffer = "";
|
|
455
|
+
/** 分段索引计数器 */
|
|
456
|
+
this.segmentIndex = 0;
|
|
457
|
+
/** 已完成的分段列表 */
|
|
458
|
+
this.segments = [];
|
|
459
|
+
/** 是否已完成 */
|
|
460
|
+
this.isCompleted = false;
|
|
461
|
+
this.maxLength = options.maxLength || 150;
|
|
462
|
+
this.minLength = options.minLength || 10;
|
|
463
|
+
this.onSegmentComplete = options.onSegmentComplete;
|
|
464
|
+
this.onAllComplete = options.onAllComplete;
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* 接收流式文本块
|
|
468
|
+
* @param chunk - 文本块
|
|
469
|
+
*/
|
|
470
|
+
onChunk(chunk) {
|
|
471
|
+
if (!chunk || this.isCompleted) return;
|
|
472
|
+
this.buffer += chunk;
|
|
473
|
+
if (this.detectBoundary(chunk)) {
|
|
474
|
+
const newlineIndex = this.buffer.indexOf("\n");
|
|
475
|
+
if (newlineIndex !== -1) {
|
|
476
|
+
if (newlineIndex === 0) {
|
|
477
|
+
this.buffer = this.buffer.substring(1);
|
|
478
|
+
return;
|
|
479
|
+
}
|
|
480
|
+
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
481
|
+
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
482
|
+
this.flushSegmentWithBuffer(segmentBuffer);
|
|
483
|
+
while (this.buffer.includes("\n")) {
|
|
484
|
+
const nextNewlineIndex = this.buffer.indexOf("\n");
|
|
485
|
+
if (nextNewlineIndex === 0) {
|
|
486
|
+
this.buffer = this.buffer.substring(1);
|
|
487
|
+
continue;
|
|
488
|
+
}
|
|
489
|
+
const nextSegmentBuffer = this.buffer.substring(0, nextNewlineIndex);
|
|
490
|
+
this.buffer = this.buffer.substring(nextNewlineIndex + 1);
|
|
491
|
+
this.flushSegmentWithBuffer(nextSegmentBuffer);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
/**
|
|
497
|
+
* 检测分段边界
|
|
498
|
+
* @param chunk - 最新接收的文本块
|
|
499
|
+
* @returns 是否应该分段
|
|
500
|
+
*/
|
|
501
|
+
detectBoundary(chunk) {
|
|
502
|
+
if (chunk.includes("\n")) {
|
|
503
|
+
if (this.buffer.length >= this.maxLength) {
|
|
504
|
+
this.forceSplitAtSentenceBoundary();
|
|
505
|
+
}
|
|
506
|
+
return true;
|
|
507
|
+
}
|
|
508
|
+
if (this.buffer.length >= this.maxLength) {
|
|
509
|
+
this.forceSplitAtSentenceBoundary();
|
|
510
|
+
return true;
|
|
511
|
+
}
|
|
512
|
+
return false;
|
|
513
|
+
}
|
|
514
|
+
/**
|
|
515
|
+
* 在句子边界强制拆分超长段落
|
|
516
|
+
*/
|
|
517
|
+
forceSplitAtSentenceBoundary() {
|
|
518
|
+
const content = this.buffer;
|
|
519
|
+
const sentenceEnders = /[。?!]/g;
|
|
520
|
+
let lastMatch = null;
|
|
521
|
+
let match = null;
|
|
522
|
+
while ((match = sentenceEnders.exec(content)) !== null) {
|
|
523
|
+
lastMatch = match;
|
|
524
|
+
}
|
|
525
|
+
if (lastMatch && lastMatch.index > this.minLength) {
|
|
526
|
+
const splitPoint = lastMatch.index + 1;
|
|
527
|
+
const firstPart = content.substring(0, splitPoint);
|
|
528
|
+
const secondPart = content.substring(splitPoint);
|
|
529
|
+
this.buffer = firstPart;
|
|
530
|
+
this.flushSegment();
|
|
531
|
+
this.buffer = secondPart;
|
|
532
|
+
} else {
|
|
533
|
+
const midPoint = Math.floor(content.length / 2);
|
|
534
|
+
const firstPart = content.substring(0, midPoint);
|
|
535
|
+
const secondPart = content.substring(midPoint);
|
|
536
|
+
this.buffer = firstPart;
|
|
537
|
+
this.flushSegment();
|
|
538
|
+
this.buffer = secondPart;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
/**
|
|
542
|
+
* 使用指定缓冲区内容刷新为分段
|
|
543
|
+
* @param bufferToFlush - 要分段的缓冲区内容
|
|
544
|
+
*/
|
|
545
|
+
flushSegmentWithBuffer(bufferToFlush) {
|
|
546
|
+
const content = bufferToFlush;
|
|
547
|
+
if (!content) return;
|
|
548
|
+
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
549
|
+
const isTooShort = content.length < 3;
|
|
550
|
+
if (isPureSymbols && isTooShort) {
|
|
551
|
+
return;
|
|
552
|
+
}
|
|
553
|
+
const formattedContent = import_volcano_sdk2.MarkdownFormatter.format(content).replace((0, import_emoji_regex2.default)(), "");
|
|
554
|
+
if (!formattedContent) return;
|
|
555
|
+
let subSegments = [formattedContent];
|
|
556
|
+
if (formattedContent.length > this.maxLength) {
|
|
557
|
+
subSegments = this.splitLongSegment(formattedContent);
|
|
558
|
+
}
|
|
559
|
+
for (const subSegment of subSegments) {
|
|
560
|
+
if (!subSegment) continue;
|
|
561
|
+
const segment = {
|
|
562
|
+
index: this.segmentIndex++,
|
|
563
|
+
content: subSegment,
|
|
564
|
+
length: subSegment.length,
|
|
565
|
+
sent: false
|
|
566
|
+
};
|
|
567
|
+
this.segments.push(segment);
|
|
568
|
+
this.onSegmentComplete?.(segment);
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
/**
|
|
572
|
+
* 刷新当前缓冲区为分段
|
|
573
|
+
*/
|
|
574
|
+
flushSegment() {
|
|
575
|
+
const content = this.buffer.trim();
|
|
576
|
+
if (!content) {
|
|
577
|
+
this.buffer = "";
|
|
578
|
+
return;
|
|
579
|
+
}
|
|
580
|
+
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
581
|
+
const isTooShort = content.length < 3;
|
|
582
|
+
if (isPureSymbols && isTooShort) {
|
|
583
|
+
this.buffer = "";
|
|
584
|
+
return;
|
|
585
|
+
}
|
|
586
|
+
const formattedContent = import_volcano_sdk2.MarkdownFormatter.format(content).replace((0, import_emoji_regex2.default)(), "");
|
|
587
|
+
if (!formattedContent) {
|
|
588
|
+
this.buffer = "";
|
|
589
|
+
return;
|
|
590
|
+
}
|
|
591
|
+
let subSegments = [formattedContent];
|
|
592
|
+
if (formattedContent.length > this.maxLength) {
|
|
593
|
+
subSegments = this.splitLongSegment(formattedContent);
|
|
594
|
+
}
|
|
595
|
+
for (const subSegment of subSegments) {
|
|
596
|
+
if (!subSegment) continue;
|
|
597
|
+
const segment = {
|
|
598
|
+
index: this.segmentIndex++,
|
|
599
|
+
content: subSegment,
|
|
600
|
+
length: subSegment.length,
|
|
601
|
+
sent: false
|
|
602
|
+
};
|
|
603
|
+
this.segments.push(segment);
|
|
604
|
+
this.onSegmentComplete?.(segment);
|
|
605
|
+
}
|
|
606
|
+
this.buffer = "";
|
|
607
|
+
}
|
|
608
|
+
/**
|
|
609
|
+
* 拆分超长分段
|
|
610
|
+
* @param segment - 超长的分段
|
|
611
|
+
* @returns 拆分后的分段数组
|
|
612
|
+
*/
|
|
613
|
+
splitLongSegment(segment) {
|
|
614
|
+
const result = [];
|
|
615
|
+
let current = "";
|
|
616
|
+
for (const char of segment) {
|
|
617
|
+
current += char;
|
|
618
|
+
const shouldSplit = /[。?!,,]/.test(char);
|
|
619
|
+
if (shouldSplit && current.length <= this.maxLength) {
|
|
620
|
+
result.push(current);
|
|
621
|
+
current = "";
|
|
622
|
+
} else if (current.length >= this.maxLength) {
|
|
623
|
+
result.push(current);
|
|
624
|
+
current = "";
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
if (current) {
|
|
628
|
+
result.push(current);
|
|
629
|
+
}
|
|
630
|
+
return result.filter((s) => s.length > 0);
|
|
631
|
+
}
|
|
632
|
+
/**
|
|
633
|
+
* 完成流式输入
|
|
634
|
+
* 处理剩余的缓冲区内容
|
|
635
|
+
*/
|
|
636
|
+
complete() {
|
|
637
|
+
if (this.isCompleted) return;
|
|
638
|
+
this.isCompleted = true;
|
|
639
|
+
while (this.buffer.includes("\n")) {
|
|
640
|
+
const newlineIndex = this.buffer.indexOf("\n");
|
|
641
|
+
if (newlineIndex === 0) {
|
|
642
|
+
this.buffer = this.buffer.substring(1);
|
|
643
|
+
continue;
|
|
644
|
+
}
|
|
645
|
+
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
646
|
+
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
647
|
+
this.flushSegmentWithBuffer(segmentBuffer);
|
|
648
|
+
}
|
|
649
|
+
if (this.buffer.trim()) {
|
|
650
|
+
this.flushSegment();
|
|
651
|
+
}
|
|
652
|
+
this.onAllComplete?.(this.segments);
|
|
653
|
+
}
|
|
654
|
+
/**
|
|
655
|
+
* 重置分段器状态
|
|
656
|
+
*/
|
|
657
|
+
reset() {
|
|
658
|
+
this.buffer = "";
|
|
659
|
+
this.segmentIndex = 0;
|
|
660
|
+
this.segments = [];
|
|
661
|
+
this.isCompleted = false;
|
|
662
|
+
}
|
|
663
|
+
/**
|
|
664
|
+
* 获取当前缓冲区内容
|
|
665
|
+
*/
|
|
666
|
+
getBuffer() {
|
|
667
|
+
return this.buffer;
|
|
668
|
+
}
|
|
669
|
+
/**
|
|
670
|
+
* 获取已分段的列表
|
|
671
|
+
*/
|
|
672
|
+
getSegments() {
|
|
673
|
+
return this.segments;
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* 获取统计信息
|
|
677
|
+
*/
|
|
678
|
+
getStats() {
|
|
679
|
+
return {
|
|
680
|
+
bufferLength: this.buffer.length,
|
|
681
|
+
segmentCount: this.segments.length,
|
|
682
|
+
totalChars: this.segments.reduce((sum, seg) => sum + seg.length, 0)
|
|
683
|
+
};
|
|
684
|
+
}
|
|
685
|
+
};
|
|
686
|
+
|
|
687
|
+
// src/tts/StreamPlaybackManager.ts
|
|
688
|
+
var import_emoji_regex3 = __toESM(require("emoji-regex"));
|
|
689
|
+
var import_volcano_sdk3 = require("@wq-hook/volcano-sdk");
|
|
449
690
|
|
|
450
691
|
// src/tts/TextSplitter.ts
|
|
451
692
|
function splitTextByDelimiters(text, minLength = 10, maxLength = 150) {
|
|
@@ -515,338 +756,217 @@ function splitTextByDelimiters(text, minLength = 10, maxLength = 150) {
|
|
|
515
756
|
return segments;
|
|
516
757
|
}
|
|
517
758
|
|
|
518
|
-
// src/tts/
|
|
519
|
-
var NoopMetricsCollector = class {
|
|
520
|
-
record(_metric) {
|
|
521
|
-
}
|
|
522
|
-
};
|
|
523
|
-
|
|
524
|
-
// src/tts/useMessageTTS.ts
|
|
759
|
+
// src/tts/StreamPlaybackManager.ts
|
|
525
760
|
var WS_URL = "wss://openspeech.bytedance.com/api/v3/tts/bidirection";
|
|
526
|
-
var activeInstances = /* @__PURE__ */ new Map();
|
|
527
761
|
function buildFullUrl2(url, params) {
|
|
528
|
-
const { ...auth } = params;
|
|
529
762
|
const arr = [];
|
|
530
|
-
for (const key in
|
|
531
|
-
if (Object.prototype.hasOwnProperty.call(
|
|
763
|
+
for (const key in params) {
|
|
764
|
+
if (Object.prototype.hasOwnProperty.call(params, key)) {
|
|
532
765
|
arr.push(
|
|
533
|
-
`${key}=${encodeURIComponent(
|
|
766
|
+
`${key}=${encodeURIComponent(params[key])}`
|
|
534
767
|
);
|
|
535
768
|
}
|
|
536
769
|
}
|
|
537
770
|
return `${url}?${arr.join("&")}`;
|
|
538
771
|
}
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
772
|
+
var PlaybackSession = class {
|
|
773
|
+
// 标记是否正在停止,用于区分 stop() 和 pause()
|
|
774
|
+
constructor(id, config) {
|
|
775
|
+
this.listeners = /* @__PURE__ */ new Set();
|
|
776
|
+
this.audioContext = null;
|
|
777
|
+
this.analyser = null;
|
|
778
|
+
this.source = null;
|
|
779
|
+
this.audioUrl = null;
|
|
780
|
+
// TTS Resources
|
|
781
|
+
this.client = null;
|
|
782
|
+
this.splitter = null;
|
|
783
|
+
// Internal State
|
|
784
|
+
this.segmentQueue = [];
|
|
785
|
+
this.isSending = false;
|
|
786
|
+
this.isSessionStarting = false;
|
|
787
|
+
this.streamText = "";
|
|
788
|
+
this.sessionAudioBuffers = [];
|
|
789
|
+
this.isStreamFinished = false;
|
|
790
|
+
this.isSessionFinished = false;
|
|
791
|
+
this.resolveAllSegmentsSent = null;
|
|
792
|
+
this.animId = null;
|
|
793
|
+
this.lastVisUpdate = 0;
|
|
794
|
+
// Blob URL 管理状态
|
|
795
|
+
this.pausedTime = 0;
|
|
796
|
+
// 记录暂停时的播放位置
|
|
797
|
+
this.cachedAudioData = null;
|
|
798
|
+
// 缓存音频数据,用于恢复时重新创建 Blob URL
|
|
799
|
+
this.isStopping = false;
|
|
800
|
+
this.id = id;
|
|
801
|
+
this.config = config;
|
|
802
|
+
this.state = {
|
|
803
|
+
isPlaying: false,
|
|
804
|
+
isPaused: false,
|
|
805
|
+
isSynthesizing: false,
|
|
806
|
+
progress: 0,
|
|
807
|
+
visualizationData: {
|
|
808
|
+
frequencyData: new Uint8Array(0),
|
|
809
|
+
timeDomainData: new Uint8Array(0)
|
|
810
|
+
},
|
|
811
|
+
error: null,
|
|
812
|
+
isConnected: false,
|
|
813
|
+
isSessionStarted: false,
|
|
814
|
+
isStreamFinished: false
|
|
815
|
+
};
|
|
816
|
+
this.audio = new Audio();
|
|
817
|
+
this.audio.crossOrigin = "anonymous";
|
|
818
|
+
this.setupAudioListeners();
|
|
819
|
+
}
|
|
820
|
+
/**
|
|
821
|
+
* 初始化 AudioContext(用于可视化)
|
|
822
|
+
*/
|
|
823
|
+
initAudioContext() {
|
|
824
|
+
if (!this.audioContext) {
|
|
588
825
|
const AudioContextClass = window.AudioContext || window.webkitAudioContext;
|
|
589
|
-
|
|
826
|
+
this.audioContext = new AudioContextClass();
|
|
590
827
|
}
|
|
591
|
-
if (
|
|
592
|
-
|
|
828
|
+
if (this.audioContext.state === "suspended") {
|
|
829
|
+
this.audioContext.resume();
|
|
593
830
|
}
|
|
594
|
-
if (!
|
|
595
|
-
|
|
596
|
-
|
|
831
|
+
if (!this.analyser && this.audioContext) {
|
|
832
|
+
this.analyser = this.audioContext.createAnalyser();
|
|
833
|
+
this.analyser.fftSize = this.config.visualization?.fftSize || 256;
|
|
597
834
|
}
|
|
598
|
-
if (!
|
|
835
|
+
if (!this.source && this.audioContext && this.analyser) {
|
|
599
836
|
try {
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
);
|
|
603
|
-
sourceRef.current.connect(analyserRef.current);
|
|
604
|
-
analyserRef.current.connect(audioContextRef.current.destination);
|
|
837
|
+
this.source = this.audioContext.createMediaElementSource(this.audio);
|
|
838
|
+
this.source.connect(this.analyser);
|
|
839
|
+
this.analyser.connect(this.audioContext.destination);
|
|
605
840
|
} catch (e) {
|
|
606
841
|
}
|
|
607
842
|
}
|
|
608
|
-
}
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
843
|
+
}
|
|
844
|
+
setupAudioListeners() {
|
|
845
|
+
this.audio.onplay = () => {
|
|
846
|
+
this.updateState({ isPlaying: true, isPaused: false });
|
|
847
|
+
this.config.onPlayStart?.();
|
|
848
|
+
this.initAudioContext();
|
|
849
|
+
this.startVisualizationLoop();
|
|
850
|
+
};
|
|
851
|
+
this.audio.onpause = () => {
|
|
852
|
+
if (this.isStopping) return;
|
|
853
|
+
this.updateState({ isPaused: true, isPlaying: false });
|
|
854
|
+
this.config.onPlayPause?.();
|
|
855
|
+
};
|
|
856
|
+
this.audio.onended = () => {
|
|
857
|
+
this.updateState({
|
|
858
|
+
isPlaying: false,
|
|
859
|
+
isPaused: false,
|
|
860
|
+
isSynthesizing: false,
|
|
861
|
+
progress: 0,
|
|
862
|
+
visualizationData: {
|
|
863
|
+
frequencyData: new Uint8Array(0),
|
|
864
|
+
timeDomainData: new Uint8Array(0)
|
|
865
|
+
}
|
|
866
|
+
});
|
|
867
|
+
this.config.onPlayEnd?.();
|
|
868
|
+
this.releaseBlobUrl();
|
|
869
|
+
this.pausedTime = 0;
|
|
870
|
+
this.stopVisualizationLoop();
|
|
871
|
+
this.config.onSessionEnd?.(this.id);
|
|
872
|
+
};
|
|
873
|
+
this.audio.onerror = async (e) => {
|
|
874
|
+
const msg = this.audio.error?.message || "Audio playback error";
|
|
875
|
+
if (msg.includes("Empty src") || msg.includes("empty src")) {
|
|
876
|
+
console.log("[PlaybackSession] Ignoring empty src error during transition");
|
|
877
|
+
return;
|
|
628
878
|
}
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
}
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
onPlayPause?.();
|
|
658
|
-
}, [onPlayPause]);
|
|
659
|
-
const resume = (0, import_react3.useCallback)(() => {
|
|
660
|
-
stopOthers();
|
|
661
|
-
if (isFallbackRef.current) {
|
|
662
|
-
window.speechSynthesis.resume();
|
|
663
|
-
} else if (audioRef.current) {
|
|
664
|
-
audioRef.current.play();
|
|
665
|
-
}
|
|
666
|
-
setIsPaused(false);
|
|
667
|
-
setIsPlaying(true);
|
|
668
|
-
onPlayResume?.();
|
|
669
|
-
activeInstances.set(instanceId, { pause });
|
|
670
|
-
}, [stopOthers, instanceId, pause, onPlayResume]);
|
|
671
|
-
const togglePlay = (0, import_react3.useCallback)(() => {
|
|
672
|
-
if (isPlaying) {
|
|
673
|
-
pause();
|
|
674
|
-
} else {
|
|
675
|
-
resume();
|
|
676
|
-
}
|
|
677
|
-
}, [isPlaying, pause, resume]);
|
|
678
|
-
const playFallback = (0, import_react3.useCallback)(
|
|
679
|
-
(text) => {
|
|
680
|
-
console.warn("[useMessageTTS] Switching to fallback TTS");
|
|
681
|
-
isFallbackRef.current = true;
|
|
682
|
-
if (clientRef.current) {
|
|
683
|
-
clientRef.current.close();
|
|
684
|
-
clientRef.current = null;
|
|
879
|
+
console.error("[PlaybackSession] Audio error:", msg);
|
|
880
|
+
const isBlobUrlExpired = msg.includes("ERR_FILE_NOT_FOUND") || msg.includes("PIPELINE_ERROR_READ") || msg.includes("MEDIA_ELEMENT_ERROR") || this.audio.error?.code === MediaError.MEDIA_ERR_NETWORK || this.audio.error?.code === MediaError.MEDIA_ERR_SRC_NOT_SUPPORTED;
|
|
881
|
+
if (isBlobUrlExpired && this.cachedAudioData) {
|
|
882
|
+
console.warn(
|
|
883
|
+
"[PlaybackSession] Blob URL expired, attempting to recreate from cache"
|
|
884
|
+
);
|
|
885
|
+
this.releaseBlobUrl();
|
|
886
|
+
const blob = new Blob(this.cachedAudioData, { type: "audio/mpeg" });
|
|
887
|
+
this.audioUrl = URL.createObjectURL(blob);
|
|
888
|
+
this.audio.src = this.audioUrl;
|
|
889
|
+
const resumeTime = this.pausedTime || 0;
|
|
890
|
+
try {
|
|
891
|
+
await this.audio.play();
|
|
892
|
+
if (resumeTime > 0) {
|
|
893
|
+
this.audio.currentTime = resumeTime;
|
|
894
|
+
}
|
|
895
|
+
return;
|
|
896
|
+
} catch (playErr) {
|
|
897
|
+
console.error("[PlaybackSession] Failed to replay from cache:", playErr);
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
this.updateState({ error: msg });
|
|
901
|
+
this.config.onError?.(new Error(msg));
|
|
902
|
+
};
|
|
903
|
+
this.audio.ontimeupdate = () => {
|
|
904
|
+
let duration = this.audio.duration;
|
|
905
|
+
if (!isFinite(duration) && this.audio.buffered.length > 0) {
|
|
906
|
+
duration = this.audio.buffered.end(this.audio.buffered.length - 1);
|
|
685
907
|
}
|
|
686
|
-
if (
|
|
687
|
-
|
|
688
|
-
|
|
908
|
+
if (isFinite(duration) && duration > 0) {
|
|
909
|
+
const progress = this.audio.currentTime / duration * 100;
|
|
910
|
+
this.updateState({ progress });
|
|
689
911
|
}
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
async (text, targetVoice) => {
|
|
721
|
-
stop();
|
|
722
|
-
stopOthers();
|
|
723
|
-
setErrorState(null);
|
|
724
|
-
setIsSynthesizing(true);
|
|
725
|
-
setProgress(0);
|
|
726
|
-
audioBuffersRef.current = [];
|
|
727
|
-
isFallbackRef.current = false;
|
|
728
|
-
const speed = audioParams?.speech_rate || 0;
|
|
729
|
-
const voice = targetVoice;
|
|
730
|
-
const cacheKey = TTSCache.generateKey(text, voice, speed);
|
|
731
|
-
cacheKeyRef.current = cacheKey;
|
|
732
|
-
const startTime = Date.now();
|
|
733
|
-
metricsCollector.record({
|
|
734
|
-
name: "tts_request",
|
|
735
|
-
labels: { voice, speed, text_length: text.length },
|
|
736
|
-
value: 1,
|
|
737
|
-
timestamp: startTime
|
|
738
|
-
});
|
|
739
|
-
try {
|
|
740
|
-
const cachedData = await TTSCache.get(cacheKey);
|
|
741
|
-
const audio = new Audio();
|
|
742
|
-
audio.crossOrigin = "anonymous";
|
|
743
|
-
audioRef.current = audio;
|
|
744
|
-
audio.onplay = () => {
|
|
745
|
-
setIsPlaying(true);
|
|
746
|
-
setIsPaused(false);
|
|
747
|
-
onPlayStart?.();
|
|
748
|
-
initAudioContext();
|
|
749
|
-
activeInstances.set(instanceId, { pause });
|
|
750
|
-
metricsCollector.record({
|
|
751
|
-
name: "tts_latency",
|
|
752
|
-
labels: { stage: "playback", voice, speed },
|
|
753
|
-
value: Date.now() - startTime,
|
|
754
|
-
timestamp: Date.now()
|
|
755
|
-
});
|
|
756
|
-
};
|
|
757
|
-
audio.onpause = () => {
|
|
758
|
-
if (!audio.ended) {
|
|
759
|
-
}
|
|
760
|
-
};
|
|
761
|
-
audio.onended = () => {
|
|
762
|
-
setIsPlaying(false);
|
|
763
|
-
setIsPaused(false);
|
|
764
|
-
onPlayEnd?.();
|
|
765
|
-
activeInstances.delete(instanceId);
|
|
766
|
-
};
|
|
767
|
-
audio.onerror = (e) => {
|
|
768
|
-
console.error("Audio playback error:", e, audio.error);
|
|
769
|
-
metricsCollector.record({
|
|
770
|
-
name: "tts_error",
|
|
771
|
-
labels: {
|
|
772
|
-
error_code: "playback_error",
|
|
773
|
-
voice,
|
|
774
|
-
detail: audio.error?.message || String(audio.error?.code)
|
|
775
|
-
},
|
|
776
|
-
value: 1,
|
|
777
|
-
timestamp: Date.now()
|
|
778
|
-
});
|
|
779
|
-
handleError(text, voice);
|
|
780
|
-
};
|
|
781
|
-
audio.ontimeupdate = () => {
|
|
782
|
-
let duration = audio.duration;
|
|
783
|
-
if (!isFinite(duration)) {
|
|
784
|
-
if (audio.buffered.length > 0) {
|
|
785
|
-
duration = audio.buffered.end(audio.buffered.length - 1);
|
|
786
|
-
}
|
|
787
|
-
}
|
|
788
|
-
if (isFinite(duration) && duration > 0) {
|
|
789
|
-
setProgress(audio.currentTime / duration * 100);
|
|
790
|
-
}
|
|
791
|
-
};
|
|
792
|
-
if (cachedData) {
|
|
793
|
-
const totalSize = cachedData.reduce(
|
|
794
|
-
(acc, buf) => acc + buf.byteLength,
|
|
795
|
-
0
|
|
796
|
-
);
|
|
797
|
-
metricsCollector.record({
|
|
798
|
-
name: "tts_cache_hit",
|
|
799
|
-
labels: { voice, speed },
|
|
800
|
-
value: 1,
|
|
801
|
-
timestamp: Date.now()
|
|
802
|
-
});
|
|
803
|
-
console.log(
|
|
804
|
-
JSON.stringify({
|
|
805
|
-
event: "tts_cache_hit",
|
|
806
|
-
cache_hit: true,
|
|
807
|
-
text_len: text.length,
|
|
808
|
-
voice,
|
|
809
|
-
speed,
|
|
810
|
-
data_size: totalSize
|
|
811
|
-
})
|
|
812
|
-
);
|
|
813
|
-
if (totalSize === 0) {
|
|
814
|
-
console.warn(
|
|
815
|
-
"[useMessageTTS] Cached data is empty, falling back to stream"
|
|
816
|
-
);
|
|
817
|
-
} else {
|
|
818
|
-
const blob = new Blob(cachedData, { type: "audio/mpeg" });
|
|
819
|
-
const url2 = URL.createObjectURL(blob);
|
|
820
|
-
audioUrlRef.current = url2;
|
|
821
|
-
audio.src = url2;
|
|
822
|
-
setIsSynthesizing(false);
|
|
823
|
-
if (autoPlay) {
|
|
824
|
-
try {
|
|
825
|
-
await audio.play();
|
|
826
|
-
} catch (err) {
|
|
827
|
-
console.warn("AutoPlay blocked", err);
|
|
828
|
-
}
|
|
829
|
-
}
|
|
830
|
-
return;
|
|
831
|
-
}
|
|
912
|
+
};
|
|
913
|
+
}
|
|
914
|
+
/**
|
|
915
|
+
* 建立 WebSocket 连接
|
|
916
|
+
*/
|
|
917
|
+
async connect() {
|
|
918
|
+
if (this.state.isConnected) return;
|
|
919
|
+
this.updateState({
|
|
920
|
+
error: null,
|
|
921
|
+
progress: 0,
|
|
922
|
+
isSynthesizing: false,
|
|
923
|
+
isConnected: false,
|
|
924
|
+
isSessionStarted: false
|
|
925
|
+
});
|
|
926
|
+
this.streamText = "";
|
|
927
|
+
this.segmentQueue = [];
|
|
928
|
+
this.sessionAudioBuffers = [];
|
|
929
|
+
this.isStreamFinished = false;
|
|
930
|
+
this.isSessionFinished = false;
|
|
931
|
+
this.isSessionStarting = false;
|
|
932
|
+
if (this.client) {
|
|
933
|
+
this.client.close();
|
|
934
|
+
this.client = null;
|
|
935
|
+
}
|
|
936
|
+
this.splitter = new StreamingTextSplitter({
|
|
937
|
+
maxLength: this.config.maxSegmentLength || 150,
|
|
938
|
+
onSegmentComplete: (segment) => {
|
|
939
|
+
this.segmentQueue.push(segment);
|
|
940
|
+
if (this.state.isSessionStarted) {
|
|
941
|
+
this.processQueue();
|
|
832
942
|
}
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
943
|
+
}
|
|
944
|
+
});
|
|
945
|
+
this.client = (0, import_tts2.WebsocketMSE)({ autoStartSession: false });
|
|
946
|
+
const { ttsConfig, audioParams } = this.config;
|
|
947
|
+
const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
|
|
948
|
+
const startTime = Date.now();
|
|
949
|
+
this.config.metricsCollector?.record({
|
|
950
|
+
name: "tts_request",
|
|
951
|
+
labels: { voice, text_length: 0 },
|
|
952
|
+
value: 1,
|
|
953
|
+
timestamp: startTime
|
|
954
|
+
});
|
|
955
|
+
return new Promise((resolve, reject) => {
|
|
956
|
+
const timeoutId = setTimeout(() => {
|
|
957
|
+
const err = new Error("WebSocket connection timeout (10s)");
|
|
958
|
+
this.updateState({ error: err.message });
|
|
959
|
+
reject(err);
|
|
960
|
+
}, 1e4);
|
|
961
|
+
try {
|
|
962
|
+
const url = this.client.start({
|
|
841
963
|
url: buildFullUrl2(WS_URL, {
|
|
842
964
|
api_access_key: `Jwt; ${ttsConfig.token}`,
|
|
843
965
|
api_app_key: ttsConfig.appid,
|
|
844
966
|
api_resource_id: ttsConfig.resourceId || "seed-tts-2.0"
|
|
845
967
|
}),
|
|
846
968
|
config: {
|
|
847
|
-
user: {
|
|
848
|
-
uid: `req-${Date.now()}`
|
|
849
|
-
},
|
|
969
|
+
user: { uid: `req-${Date.now()}` },
|
|
850
970
|
namespace: ttsConfig.namespace || "BidirectionalTTS",
|
|
851
971
|
req_params: {
|
|
852
972
|
speaker: voice,
|
|
@@ -861,455 +981,722 @@ function useMessageTTS({
|
|
|
861
981
|
enable_language_detector: true,
|
|
862
982
|
disable_markdown_filter: true,
|
|
863
983
|
enable_latex_tn: true
|
|
864
|
-
// max_length_to_filter_parenthesis: 100,
|
|
865
984
|
})
|
|
866
985
|
}
|
|
867
986
|
},
|
|
987
|
+
onStart: () => {
|
|
988
|
+
this.updateState({ isConnected: true });
|
|
989
|
+
},
|
|
990
|
+
onConnectionReady: () => {
|
|
991
|
+
clearTimeout(timeoutId);
|
|
992
|
+
resolve();
|
|
993
|
+
},
|
|
868
994
|
onSessionStarted: () => {
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
995
|
+
this.updateState({ isSessionStarted: true });
|
|
996
|
+
this.isSessionStarting = false;
|
|
997
|
+
if (this.segmentQueue.length > 0) {
|
|
998
|
+
this.processQueue();
|
|
999
|
+
}
|
|
873
1000
|
},
|
|
874
1001
|
onMessage: (data) => {
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
);
|
|
1002
|
+
this.updateState({ isSynthesizing: true });
|
|
1003
|
+
if (this.sessionAudioBuffers.length === 0) {
|
|
1004
|
+
this.config.metricsCollector?.record({
|
|
1005
|
+
name: "tts_latency",
|
|
1006
|
+
labels: { stage: "first_packet", voice },
|
|
1007
|
+
value: Date.now() - startTime,
|
|
1008
|
+
timestamp: Date.now()
|
|
1009
|
+
});
|
|
883
1010
|
}
|
|
884
1011
|
const buffer = data instanceof ArrayBuffer ? data.slice(0) : new Uint8Array(data).buffer;
|
|
885
|
-
|
|
1012
|
+
this.sessionAudioBuffers.push(buffer);
|
|
886
1013
|
},
|
|
887
1014
|
onSessionFinished: () => {
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
1015
|
+
this.updateState({
|
|
1016
|
+
isSynthesizing: false,
|
|
1017
|
+
isSessionStarted: false
|
|
1018
|
+
});
|
|
1019
|
+
if (this.sessionAudioBuffers.length > 0) {
|
|
1020
|
+
this.cachedAudioData = [...this.sessionAudioBuffers];
|
|
891
1021
|
}
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
text_len: text.length,
|
|
897
|
-
duration_ms: Date.now() - startTime,
|
|
1022
|
+
if (this.sessionAudioBuffers.length > 0 && this.streamText) {
|
|
1023
|
+
const speed = audioParams?.speech_rate || 0;
|
|
1024
|
+
const cacheKey = TTSCache.generateKey(
|
|
1025
|
+
this.streamText,
|
|
898
1026
|
voice,
|
|
899
1027
|
speed
|
|
900
|
-
|
|
901
|
-
|
|
1028
|
+
);
|
|
1029
|
+
TTSCache.set(cacheKey, [...this.sessionAudioBuffers]);
|
|
1030
|
+
}
|
|
1031
|
+
this.config.metricsCollector?.record({
|
|
1032
|
+
name: "tts_synthesis_finished",
|
|
1033
|
+
labels: { voice, text_length: this.streamText.length },
|
|
1034
|
+
value: Date.now() - startTime,
|
|
1035
|
+
timestamp: Date.now()
|
|
1036
|
+
});
|
|
902
1037
|
},
|
|
903
1038
|
onError: (err) => {
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
1039
|
+
if (!this.state.isConnected) {
|
|
1040
|
+
clearTimeout(timeoutId);
|
|
1041
|
+
reject(new Error(err.msg || "TTS error"));
|
|
1042
|
+
}
|
|
1043
|
+
console.error("[PlaybackSession] TTS error:", err);
|
|
1044
|
+
this.updateState({
|
|
1045
|
+
error: err.msg || "TTS error",
|
|
1046
|
+
isSynthesizing: false
|
|
910
1047
|
});
|
|
911
|
-
|
|
912
|
-
|
|
1048
|
+
this.config.onError?.(new Error(err.msg || "TTS error"));
|
|
1049
|
+
},
|
|
1050
|
+
onWSError: (err) => {
|
|
1051
|
+
if (!this.state.isConnected) {
|
|
1052
|
+
clearTimeout(timeoutId);
|
|
1053
|
+
reject(err instanceof Error ? err : new Error("WebSocket error"));
|
|
1054
|
+
}
|
|
913
1055
|
}
|
|
914
1056
|
});
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
1057
|
+
if (this.audioUrl) {
|
|
1058
|
+
URL.revokeObjectURL(this.audioUrl);
|
|
1059
|
+
}
|
|
1060
|
+
this.audioUrl = url;
|
|
1061
|
+
this.audio.src = url;
|
|
1062
|
+
if (this.config.autoPlay !== false) {
|
|
1063
|
+
this.audio.play().catch(
|
|
1064
|
+
(e) => console.warn("[PlaybackSession] Autoplay blocked:", e)
|
|
1065
|
+
);
|
|
923
1066
|
}
|
|
924
1067
|
} catch (err) {
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
timestamp: Date.now()
|
|
931
|
-
});
|
|
932
|
-
handleError(text, voice);
|
|
933
|
-
}
|
|
934
|
-
},
|
|
935
|
-
[
|
|
936
|
-
ttsConfig,
|
|
937
|
-
audioParams,
|
|
938
|
-
autoPlay,
|
|
939
|
-
stop,
|
|
940
|
-
stopOthers,
|
|
941
|
-
instanceId,
|
|
942
|
-
onPlayStart,
|
|
943
|
-
onPlayEnd,
|
|
944
|
-
initAudioContext,
|
|
945
|
-
pause,
|
|
946
|
-
fallbackVoice,
|
|
947
|
-
metricsCollector
|
|
948
|
-
]
|
|
949
|
-
);
|
|
950
|
-
const handleError = (0, import_react3.useCallback)(
|
|
951
|
-
(text, failedVoice) => {
|
|
952
|
-
if (fallbackVoice && failedVoice !== fallbackVoice) {
|
|
953
|
-
console.warn(
|
|
954
|
-
`[useMessageTTS] Voice ${failedVoice} failed, switching to fallback voice ${fallbackVoice}`
|
|
1068
|
+
clearTimeout(timeoutId);
|
|
1069
|
+
console.error("[PlaybackSession] Connect error:", err);
|
|
1070
|
+
this.updateState({ error: String(err) });
|
|
1071
|
+
this.config.onError?.(
|
|
1072
|
+
err instanceof Error ? err : new Error(String(err))
|
|
955
1073
|
);
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
1074
|
+
reject(err);
|
|
1075
|
+
}
|
|
1076
|
+
});
|
|
1077
|
+
}
|
|
1078
|
+
/**
|
|
1079
|
+
* 发送流式文本
|
|
1080
|
+
*/
|
|
1081
|
+
handleStreamChunk(chunk) {
|
|
1082
|
+
if (!chunk) return;
|
|
1083
|
+
this.streamText += chunk;
|
|
1084
|
+
if (!this.state.isSessionStarted && !this.isSessionStarting && this.client && this.state.isConnected && !this.isSessionFinished) {
|
|
1085
|
+
this.isSessionStarting = true;
|
|
1086
|
+
this.client.startSession();
|
|
1087
|
+
}
|
|
1088
|
+
this.splitter?.onChunk(chunk);
|
|
1089
|
+
if (this.state.isSessionStarted) {
|
|
1090
|
+
this.processQueue();
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
/**
|
|
1094
|
+
* 结束流式输入
|
|
1095
|
+
*/
|
|
1096
|
+
async finishStream() {
|
|
1097
|
+
this.isStreamFinished = true;
|
|
1098
|
+
this.updateState({ isStreamFinished: true });
|
|
1099
|
+
this.splitter?.complete();
|
|
1100
|
+
if (this.state.isSessionStarted) {
|
|
1101
|
+
this.processQueue();
|
|
1102
|
+
}
|
|
1103
|
+
if (this.segmentQueue.length > 0 || this.isSending) {
|
|
1104
|
+
await new Promise((resolve) => {
|
|
1105
|
+
this.resolveAllSegmentsSent = resolve;
|
|
1106
|
+
});
|
|
1107
|
+
} else if (this.client && this.state.isSessionStarted && !this.isSessionFinished) {
|
|
1108
|
+
this.isSessionFinished = true;
|
|
1109
|
+
this.client.finishSession();
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
/**
|
|
1113
|
+
* 处理非流式播放(直接播放整段文本)
|
|
1114
|
+
*/
|
|
1115
|
+
async play(text) {
|
|
1116
|
+
const formattedText = import_volcano_sdk3.MarkdownFormatter.format(text).replace(
|
|
1117
|
+
(0, import_emoji_regex3.default)(),
|
|
1118
|
+
""
|
|
1119
|
+
);
|
|
1120
|
+
const { audioParams } = this.config;
|
|
1121
|
+
const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
|
|
1122
|
+
const speed = audioParams?.speech_rate || 0;
|
|
1123
|
+
const cacheKey = TTSCache.generateKey(formattedText, voice, speed);
|
|
1124
|
+
const cachedData = await TTSCache.get(cacheKey);
|
|
1125
|
+
if (cachedData && cachedData.length > 0) {
|
|
1126
|
+
this.cachedAudioData = cachedData;
|
|
1127
|
+
this.releaseBlobUrl();
|
|
1128
|
+
const blob = new Blob(cachedData, { type: "audio/mpeg" });
|
|
1129
|
+
this.audioUrl = URL.createObjectURL(blob);
|
|
1130
|
+
this.audio.src = this.audioUrl;
|
|
1131
|
+
this.pausedTime = 0;
|
|
1132
|
+
this.updateState({ isSynthesizing: false });
|
|
1133
|
+
if (this.config.autoPlay !== false) {
|
|
1134
|
+
try {
|
|
1135
|
+
await this.audio.play();
|
|
1136
|
+
} catch (e) {
|
|
1137
|
+
console.warn("Autoplay blocked", e);
|
|
963
1138
|
}
|
|
964
|
-
executeTTS(text, fallbackVoice);
|
|
965
|
-
} else {
|
|
966
|
-
playFallback(text);
|
|
967
1139
|
}
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
(
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
1140
|
+
return;
|
|
1141
|
+
}
|
|
1142
|
+
await this.connect();
|
|
1143
|
+
this.streamText = formattedText;
|
|
1144
|
+
const segments = splitTextByDelimiters(formattedText);
|
|
1145
|
+
if (this.state.isConnected) {
|
|
1146
|
+
if (!this.state.isSessionStarted && !this.isSessionStarting) {
|
|
1147
|
+
this.isSessionStarting = true;
|
|
1148
|
+
this.client?.startSession();
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
segments.forEach((seg, idx) => {
|
|
1152
|
+
this.segmentQueue.push({
|
|
1153
|
+
index: idx,
|
|
1154
|
+
content: seg.content,
|
|
1155
|
+
length: seg.content.length,
|
|
1156
|
+
sent: false
|
|
1157
|
+
});
|
|
1158
|
+
});
|
|
1159
|
+
if (this.state.isSessionStarted) {
|
|
1160
|
+
this.processQueue();
|
|
1161
|
+
}
|
|
1162
|
+
await this.finishStream();
|
|
1163
|
+
}
|
|
1164
|
+
processQueue() {
|
|
1165
|
+
if (!this.client || !this.state.isSessionStarted || this.isSending || this.isSessionFinished) {
|
|
1166
|
+
return;
|
|
1167
|
+
}
|
|
1168
|
+
if (this.segmentQueue.length === 0) {
|
|
1169
|
+
if (this.isStreamFinished && !this.isSessionFinished) {
|
|
1170
|
+
this.isSessionFinished = true;
|
|
1171
|
+
this.client.finishSession();
|
|
1172
|
+
this.resolveAllSegmentsSent?.();
|
|
1173
|
+
}
|
|
1174
|
+
return;
|
|
1175
|
+
}
|
|
1176
|
+
this.isSending = true;
|
|
1177
|
+
const segment = this.segmentQueue.shift();
|
|
1178
|
+
this.client.sendText(segment.content);
|
|
1179
|
+
segment.sent = true;
|
|
1180
|
+
this.isSending = false;
|
|
1181
|
+
setTimeout(() => this.processQueue(), 0);
|
|
1182
|
+
}
|
|
1183
|
+
pause() {
|
|
1184
|
+
if (this.isStopping) return;
|
|
1185
|
+
this.pausedTime = this.audio.currentTime;
|
|
1186
|
+
this.audio.pause();
|
|
1187
|
+
this.releaseBlobUrl();
|
|
1188
|
+
this.updateState({ isPaused: true, isPlaying: false });
|
|
1189
|
+
}
|
|
1190
|
+
async resume() {
|
|
1191
|
+
if (!this.audioUrl && this.cachedAudioData) {
|
|
1192
|
+
const blob = new Blob(this.cachedAudioData, { type: "audio/mpeg" });
|
|
1193
|
+
this.audioUrl = URL.createObjectURL(blob);
|
|
1194
|
+
this.audio.src = this.audioUrl;
|
|
1195
|
+
await new Promise((resolve, reject) => {
|
|
1196
|
+
const onLoaded = () => {
|
|
1197
|
+
resolve();
|
|
1198
|
+
this.audio.removeEventListener("loadedmetadata", onLoaded);
|
|
1199
|
+
this.audio.removeEventListener("error", onError);
|
|
1200
|
+
};
|
|
1201
|
+
const onError = () => {
|
|
1202
|
+
reject(new Error("Failed to load audio"));
|
|
1203
|
+
this.audio.removeEventListener("loadedmetadata", onLoaded);
|
|
1204
|
+
this.audio.removeEventListener("error", onError);
|
|
1205
|
+
};
|
|
1206
|
+
this.audio.addEventListener("loadedmetadata", onLoaded);
|
|
1207
|
+
this.audio.addEventListener("error", onError);
|
|
1208
|
+
setTimeout(() => {
|
|
1209
|
+
this.audio.removeEventListener("loadedmetadata", onLoaded);
|
|
1210
|
+
this.audio.removeEventListener("error", onError);
|
|
1211
|
+
resolve();
|
|
1212
|
+
}, 3e3);
|
|
1213
|
+
});
|
|
1214
|
+
this.audio.currentTime = this.pausedTime;
|
|
1215
|
+
}
|
|
1216
|
+
await this.audio.play();
|
|
1217
|
+
this.updateState({ isPaused: false, isPlaying: true });
|
|
1218
|
+
}
|
|
1219
|
+
stop() {
|
|
1220
|
+
this.isStopping = true;
|
|
1221
|
+
if (this.client) {
|
|
1222
|
+
this.client.close();
|
|
1223
|
+
this.client = null;
|
|
1224
|
+
}
|
|
1225
|
+
this.audio.pause();
|
|
1226
|
+
this.audio.currentTime = 0;
|
|
1227
|
+
this.releaseBlobUrl();
|
|
1228
|
+
this.cachedAudioData = null;
|
|
1229
|
+
this.pausedTime = 0;
|
|
1230
|
+
this.stopVisualizationLoop();
|
|
1231
|
+
this.audioContext?.close();
|
|
1232
|
+
this.audioContext = null;
|
|
1233
|
+
this.updateState({
|
|
1234
|
+
isPlaying: false,
|
|
1235
|
+
isPaused: false,
|
|
1236
|
+
isSynthesizing: false,
|
|
1237
|
+
progress: 0,
|
|
1238
|
+
isConnected: false,
|
|
1239
|
+
isSessionStarted: false,
|
|
1240
|
+
// 清除可视化数据
|
|
1241
|
+
visualizationData: {
|
|
1242
|
+
frequencyData: new Uint8Array(0),
|
|
1243
|
+
timeDomainData: new Uint8Array(0)
|
|
1244
|
+
}
|
|
1245
|
+
});
|
|
1246
|
+
this.isStopping = false;
|
|
1247
|
+
}
|
|
1248
|
+
seek(percentage) {
|
|
1249
|
+
let duration = this.audio.duration;
|
|
1250
|
+
if (!isFinite(duration) && this.audio.buffered.length > 0) {
|
|
1251
|
+
duration = this.audio.buffered.end(this.audio.buffered.length - 1);
|
|
1252
|
+
}
|
|
1253
|
+
if (isFinite(duration) && duration > 0) {
|
|
1254
|
+
const time = percentage / 100 * duration;
|
|
1255
|
+
if (isFinite(time)) {
|
|
1256
|
+
this.audio.currentTime = time;
|
|
1257
|
+
this.updateState({ progress: percentage });
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
updateState(partial) {
|
|
1262
|
+
this.state = { ...this.state, ...partial };
|
|
1263
|
+
this.notifyListeners();
|
|
1264
|
+
}
|
|
1265
|
+
subscribe(listener) {
|
|
1266
|
+
this.listeners.add(listener);
|
|
1267
|
+
listener(this.state);
|
|
1268
|
+
return () => this.listeners.delete(listener);
|
|
1269
|
+
}
|
|
1270
|
+
notifyListeners() {
|
|
1271
|
+
this.listeners.forEach((l) => l(this.state));
|
|
1272
|
+
}
|
|
1273
|
+
// Visualization
|
|
1274
|
+
getFrequencyData() {
|
|
1275
|
+
if (!this.analyser) return new Uint8Array(0);
|
|
1276
|
+
const data = new Uint8Array(this.analyser.frequencyBinCount);
|
|
1277
|
+
this.analyser.getByteFrequencyData(data);
|
|
1278
|
+
return data;
|
|
1279
|
+
}
|
|
1280
|
+
getTimeDomainData() {
|
|
1281
|
+
if (!this.analyser) return new Uint8Array(0);
|
|
1282
|
+
const data = new Uint8Array(this.analyser.frequencyBinCount);
|
|
1283
|
+
this.analyser.getByteTimeDomainData(data);
|
|
1284
|
+
return data;
|
|
1285
|
+
}
|
|
1286
|
+
startVisualizationLoop() {
|
|
1287
|
+
if (!this.config.visualization?.enabled) return;
|
|
995
1288
|
const update = (timestamp) => {
|
|
996
|
-
if (isPlaying && !isPaused) {
|
|
997
|
-
if (timestamp -
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1289
|
+
if (this.state.isPlaying && !this.state.isPaused) {
|
|
1290
|
+
if (timestamp - this.lastVisUpdate >= (this.config.visualization?.refreshInterval || 0)) {
|
|
1291
|
+
this.updateState({
|
|
1292
|
+
visualizationData: {
|
|
1293
|
+
frequencyData: this.getFrequencyData(),
|
|
1294
|
+
timeDomainData: this.getTimeDomainData()
|
|
1295
|
+
}
|
|
1001
1296
|
});
|
|
1002
|
-
|
|
1297
|
+
this.lastVisUpdate = timestamp;
|
|
1003
1298
|
}
|
|
1004
|
-
animId = requestAnimationFrame(update);
|
|
1299
|
+
this.animId = requestAnimationFrame(update);
|
|
1005
1300
|
}
|
|
1006
1301
|
};
|
|
1007
|
-
|
|
1008
|
-
|
|
1302
|
+
this.animId = requestAnimationFrame(update);
|
|
1303
|
+
}
|
|
1304
|
+
stopVisualizationLoop() {
|
|
1305
|
+
if (this.animId) {
|
|
1306
|
+
cancelAnimationFrame(this.animId);
|
|
1307
|
+
this.animId = null;
|
|
1009
1308
|
}
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1309
|
+
}
|
|
1310
|
+
/**
|
|
1311
|
+
* 释放 Blob URL 资源
|
|
1312
|
+
* 在暂停、停止、播放完毕时调用,避免 Blob URL 长期占用内存和过期问题
|
|
1313
|
+
*/
|
|
1314
|
+
releaseBlobUrl() {
|
|
1315
|
+
if (this.audioUrl) {
|
|
1316
|
+
URL.revokeObjectURL(this.audioUrl);
|
|
1317
|
+
this.audioUrl = null;
|
|
1318
|
+
}
|
|
1319
|
+
this.audio.src = "";
|
|
1320
|
+
this.audio.load();
|
|
1321
|
+
}
|
|
1322
|
+
};
|
|
1323
|
+
var StreamPlaybackManagerImpl = class {
|
|
1324
|
+
constructor() {
|
|
1325
|
+
this.sessions = /* @__PURE__ */ new Map();
|
|
1326
|
+
this.activeStreamId = null;
|
|
1327
|
+
}
|
|
1328
|
+
/**
|
|
1329
|
+
* 创建新的播放会话
|
|
1330
|
+
*/
|
|
1331
|
+
createSession(id, config) {
|
|
1332
|
+
if (this.activeStreamId && this.activeStreamId !== id) {
|
|
1333
|
+
const activeSession = this.sessions.get(this.activeStreamId);
|
|
1334
|
+
if (activeSession) {
|
|
1335
|
+
const isPlaying = activeSession.state.isPlaying;
|
|
1336
|
+
const isPaused = activeSession.state.isPaused;
|
|
1337
|
+
console.log(`[StreamPlaybackManager] Checking active session ${this.activeStreamId}: isPlaying=${isPlaying}, isPaused=${isPaused}`);
|
|
1338
|
+
if (isPlaying || isPaused) {
|
|
1339
|
+
console.log(`[StreamPlaybackManager] Pausing active session ${this.activeStreamId}`);
|
|
1340
|
+
this.pause(this.activeStreamId);
|
|
1341
|
+
} else {
|
|
1342
|
+
console.log(`[StreamPlaybackManager] Active session ${this.activeStreamId} is not playing/paused, skipping pause`);
|
|
1035
1343
|
}
|
|
1036
1344
|
}
|
|
1037
1345
|
}
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
progress,
|
|
1051
|
-
getFrequencyData,
|
|
1052
|
-
getTimeDomainData,
|
|
1053
|
-
visualizationData
|
|
1054
|
-
};
|
|
1055
|
-
}
|
|
1056
|
-
|
|
1057
|
-
// src/tts/useStreamTTS.ts
|
|
1058
|
-
var import_tts3 = require("@wq-hook/volcano-sdk/tts");
|
|
1059
|
-
var import_react4 = require("react");
|
|
1060
|
-
|
|
1061
|
-
// src/tts/StreamingTextSplitter.ts
|
|
1062
|
-
var import_volcano_sdk3 = require("@wq-hook/volcano-sdk");
|
|
1063
|
-
var import_emoji_regex3 = __toESM(require("emoji-regex"));
|
|
1064
|
-
var StreamingTextSplitter = class {
|
|
1065
|
-
constructor(options = {}) {
|
|
1066
|
-
/** 当前缓冲区 */
|
|
1067
|
-
this.buffer = "";
|
|
1068
|
-
/** 分段索引计数器 */
|
|
1069
|
-
this.segmentIndex = 0;
|
|
1070
|
-
/** 已完成的分段列表 */
|
|
1071
|
-
this.segments = [];
|
|
1072
|
-
/** 是否已完成 */
|
|
1073
|
-
this.isCompleted = false;
|
|
1074
|
-
this.maxLength = options.maxLength || 150;
|
|
1075
|
-
this.minLength = options.minLength || 10;
|
|
1076
|
-
this.onSegmentComplete = options.onSegmentComplete;
|
|
1077
|
-
this.onAllComplete = options.onAllComplete;
|
|
1346
|
+
const session = new PlaybackSession(id, {
|
|
1347
|
+
...config,
|
|
1348
|
+
onSessionEnd: (sessionId) => {
|
|
1349
|
+
if (this.activeStreamId === sessionId) {
|
|
1350
|
+
this.activeStreamId = null;
|
|
1351
|
+
}
|
|
1352
|
+
config.onSessionEnd?.(sessionId);
|
|
1353
|
+
}
|
|
1354
|
+
});
|
|
1355
|
+
this.sessions.set(id, session);
|
|
1356
|
+
this.activeStreamId = id;
|
|
1357
|
+
return session;
|
|
1078
1358
|
}
|
|
1079
1359
|
/**
|
|
1080
|
-
*
|
|
1081
|
-
* @param chunk - 文本块
|
|
1360
|
+
* 获取会话
|
|
1082
1361
|
*/
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
this.buffer += chunk;
|
|
1086
|
-
if (this.detectBoundary(chunk)) {
|
|
1087
|
-
const newlineIndex = this.buffer.indexOf("\n");
|
|
1088
|
-
if (newlineIndex !== -1) {
|
|
1089
|
-
if (newlineIndex === 0) {
|
|
1090
|
-
this.buffer = this.buffer.substring(1);
|
|
1091
|
-
return;
|
|
1092
|
-
}
|
|
1093
|
-
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
1094
|
-
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
1095
|
-
this.flushSegmentWithBuffer(segmentBuffer);
|
|
1096
|
-
while (this.buffer.includes("\n")) {
|
|
1097
|
-
const nextNewlineIndex = this.buffer.indexOf("\n");
|
|
1098
|
-
if (nextNewlineIndex === 0) {
|
|
1099
|
-
this.buffer = this.buffer.substring(1);
|
|
1100
|
-
continue;
|
|
1101
|
-
}
|
|
1102
|
-
const nextSegmentBuffer = this.buffer.substring(0, nextNewlineIndex);
|
|
1103
|
-
this.buffer = this.buffer.substring(nextNewlineIndex + 1);
|
|
1104
|
-
this.flushSegmentWithBuffer(nextSegmentBuffer);
|
|
1105
|
-
}
|
|
1106
|
-
}
|
|
1107
|
-
}
|
|
1362
|
+
getSession(id) {
|
|
1363
|
+
return this.sessions.get(id);
|
|
1108
1364
|
}
|
|
1109
1365
|
/**
|
|
1110
|
-
*
|
|
1111
|
-
* @param chunk - 最新接收的文本块
|
|
1112
|
-
* @returns 是否应该分段
|
|
1366
|
+
* 停止会话
|
|
1113
1367
|
*/
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1368
|
+
stop(id) {
|
|
1369
|
+
const session = this.sessions.get(id);
|
|
1370
|
+
if (session) {
|
|
1371
|
+
session.stop();
|
|
1372
|
+
this.sessions.delete(id);
|
|
1373
|
+
if (this.activeStreamId === id) {
|
|
1374
|
+
this.activeStreamId = null;
|
|
1118
1375
|
}
|
|
1119
|
-
return true;
|
|
1120
1376
|
}
|
|
1121
|
-
if (this.buffer.length >= this.maxLength) {
|
|
1122
|
-
this.forceSplitAtSentenceBoundary();
|
|
1123
|
-
return true;
|
|
1124
|
-
}
|
|
1125
|
-
return false;
|
|
1126
1377
|
}
|
|
1127
1378
|
/**
|
|
1128
|
-
*
|
|
1379
|
+
* 暂停会话
|
|
1129
1380
|
*/
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
const sentenceEnders = /[。?!]/g;
|
|
1133
|
-
let lastMatch = null;
|
|
1134
|
-
let match = null;
|
|
1135
|
-
while ((match = sentenceEnders.exec(content)) !== null) {
|
|
1136
|
-
lastMatch = match;
|
|
1137
|
-
}
|
|
1138
|
-
if (lastMatch && lastMatch.index > this.minLength) {
|
|
1139
|
-
const splitPoint = lastMatch.index + 1;
|
|
1140
|
-
const firstPart = content.substring(0, splitPoint);
|
|
1141
|
-
const secondPart = content.substring(splitPoint);
|
|
1142
|
-
this.buffer = firstPart;
|
|
1143
|
-
this.flushSegment();
|
|
1144
|
-
this.buffer = secondPart;
|
|
1145
|
-
} else {
|
|
1146
|
-
const midPoint = Math.floor(content.length / 2);
|
|
1147
|
-
const firstPart = content.substring(0, midPoint);
|
|
1148
|
-
const secondPart = content.substring(midPoint);
|
|
1149
|
-
this.buffer = firstPart;
|
|
1150
|
-
this.flushSegment();
|
|
1151
|
-
this.buffer = secondPart;
|
|
1152
|
-
}
|
|
1381
|
+
pause(id) {
|
|
1382
|
+
this.sessions.get(id)?.pause();
|
|
1153
1383
|
}
|
|
1154
1384
|
/**
|
|
1155
|
-
*
|
|
1156
|
-
* @param bufferToFlush - 要分段的缓冲区内容
|
|
1385
|
+
* 恢复会话
|
|
1157
1386
|
*/
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
1162
|
-
const isTooShort = content.length < 3;
|
|
1163
|
-
if (isPureSymbols && isTooShort) {
|
|
1164
|
-
return;
|
|
1165
|
-
}
|
|
1166
|
-
const formattedContent = import_volcano_sdk3.MarkdownFormatter.format(content).replace((0, import_emoji_regex3.default)(), "");
|
|
1167
|
-
if (!formattedContent) return;
|
|
1168
|
-
let subSegments = [formattedContent];
|
|
1169
|
-
if (formattedContent.length > this.maxLength) {
|
|
1170
|
-
subSegments = this.splitLongSegment(formattedContent);
|
|
1171
|
-
}
|
|
1172
|
-
for (const subSegment of subSegments) {
|
|
1173
|
-
if (!subSegment) continue;
|
|
1174
|
-
const segment = {
|
|
1175
|
-
index: this.segmentIndex++,
|
|
1176
|
-
content: subSegment,
|
|
1177
|
-
length: subSegment.length,
|
|
1178
|
-
sent: false
|
|
1179
|
-
};
|
|
1180
|
-
this.segments.push(segment);
|
|
1181
|
-
this.onSegmentComplete?.(segment);
|
|
1387
|
+
resume(id) {
|
|
1388
|
+
if (this.activeStreamId && this.activeStreamId !== id) {
|
|
1389
|
+
this.pause(this.activeStreamId);
|
|
1182
1390
|
}
|
|
1391
|
+
this.sessions.get(id)?.resume();
|
|
1392
|
+
this.activeStreamId = id;
|
|
1183
1393
|
}
|
|
1184
1394
|
/**
|
|
1185
|
-
*
|
|
1395
|
+
* 注册(兼容旧 API,但推荐直接用 createSession)
|
|
1396
|
+
* 为了兼容 useMessageTTS 旧逻辑,这里可以保留一些别名,但我们会重构 hook,所以可以改变 API。
|
|
1186
1397
|
*/
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1398
|
+
};
|
|
1399
|
+
var StreamPlaybackManager = new StreamPlaybackManagerImpl();
|
|
1400
|
+
|
|
1401
|
+
// src/tts/Metrics.ts
|
|
1402
|
+
var NoopMetricsCollector = class {
|
|
1403
|
+
record(_metric) {
|
|
1404
|
+
}
|
|
1405
|
+
};
|
|
1406
|
+
|
|
1407
|
+
// src/tts/useMessageTTS.ts
|
|
1408
|
+
function useMessageTTS({
|
|
1409
|
+
ttsConfig,
|
|
1410
|
+
audioParams,
|
|
1411
|
+
autoPlay = true,
|
|
1412
|
+
metricsCollector = new NoopMetricsCollector(),
|
|
1413
|
+
onPlayStart,
|
|
1414
|
+
onPlayPause,
|
|
1415
|
+
onPlayResume,
|
|
1416
|
+
onPlayEnd,
|
|
1417
|
+
onStop,
|
|
1418
|
+
onError,
|
|
1419
|
+
fallbackVoice,
|
|
1420
|
+
visualization,
|
|
1421
|
+
streamId: externalStreamId
|
|
1422
|
+
}) {
|
|
1423
|
+
const isSubscriptionMode = !!externalStreamId;
|
|
1424
|
+
const [internalStreamId, setInternalStreamId] = (0, import_react3.useState)("");
|
|
1425
|
+
const [isSwitchedToIndependent, setIsSwitchedToIndependent] = (0, import_react3.useState)(false);
|
|
1426
|
+
const streamId = isSwitchedToIndependent ? internalStreamId : externalStreamId || internalStreamId;
|
|
1427
|
+
const [state, setState] = (0, import_react3.useState)({
|
|
1428
|
+
isPlaying: false,
|
|
1429
|
+
isPaused: false,
|
|
1430
|
+
isSynthesizing: false,
|
|
1431
|
+
progress: 0,
|
|
1432
|
+
visualizationData: {
|
|
1433
|
+
frequencyData: new Uint8Array(0),
|
|
1434
|
+
timeDomainData: new Uint8Array(0)
|
|
1435
|
+
},
|
|
1436
|
+
error: null,
|
|
1437
|
+
isConnected: false,
|
|
1438
|
+
isSessionStarted: false,
|
|
1439
|
+
isStreamFinished: false
|
|
1440
|
+
});
|
|
1441
|
+
const [error, setErrorState] = (0, import_react3.useState)(null);
|
|
1442
|
+
const isFallbackRef = (0, import_react3.useRef)(false);
|
|
1443
|
+
const fallbackUtteranceRef = (0, import_react3.useRef)(null);
|
|
1444
|
+
const currentTextRef = (0, import_react3.useRef)("");
|
|
1445
|
+
(0, import_react3.useEffect)(() => {
|
|
1446
|
+
if (!streamId) return;
|
|
1447
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1448
|
+
if (session) {
|
|
1449
|
+
const unsubscribe = session.subscribe((newState) => {
|
|
1450
|
+
setState(newState);
|
|
1451
|
+
if (newState.error) setErrorState(newState.error);
|
|
1452
|
+
});
|
|
1453
|
+
return () => {
|
|
1454
|
+
unsubscribe();
|
|
1455
|
+
};
|
|
1192
1456
|
}
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
if (
|
|
1196
|
-
|
|
1197
|
-
|
|
1457
|
+
}, [streamId]);
|
|
1458
|
+
const stop = (0, import_react3.useCallback)(() => {
|
|
1459
|
+
if (streamId) {
|
|
1460
|
+
StreamPlaybackManager.stop(streamId);
|
|
1461
|
+
if (!isSubscriptionMode || isSwitchedToIndependent) {
|
|
1462
|
+
setInternalStreamId("");
|
|
1463
|
+
setIsSwitchedToIndependent(false);
|
|
1464
|
+
}
|
|
1198
1465
|
}
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
return;
|
|
1466
|
+
if (fallbackUtteranceRef.current) {
|
|
1467
|
+
window.speechSynthesis.cancel();
|
|
1468
|
+
fallbackUtteranceRef.current = null;
|
|
1203
1469
|
}
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1470
|
+
isFallbackRef.current = false;
|
|
1471
|
+
setState((prev) => ({
|
|
1472
|
+
...prev,
|
|
1473
|
+
isPlaying: false,
|
|
1474
|
+
isPaused: false,
|
|
1475
|
+
isSynthesizing: false,
|
|
1476
|
+
progress: 0
|
|
1477
|
+
}));
|
|
1478
|
+
onStop?.();
|
|
1479
|
+
}, [streamId, isSubscriptionMode, isSwitchedToIndependent, onStop]);
|
|
1480
|
+
const pause = (0, import_react3.useCallback)(() => {
|
|
1481
|
+
if (isFallbackRef.current) {
|
|
1482
|
+
window.speechSynthesis.pause();
|
|
1483
|
+
setState((prev) => ({ ...prev, isPaused: true, isPlaying: false }));
|
|
1484
|
+
onPlayPause?.();
|
|
1485
|
+
} else if (streamId) {
|
|
1486
|
+
StreamPlaybackManager.pause(streamId);
|
|
1207
1487
|
}
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1488
|
+
}, [streamId, onPlayPause]);
|
|
1489
|
+
const resume = (0, import_react3.useCallback)(() => {
|
|
1490
|
+
if (isFallbackRef.current) {
|
|
1491
|
+
window.speechSynthesis.resume();
|
|
1492
|
+
setState((prev) => ({ ...prev, isPaused: false, isPlaying: true }));
|
|
1493
|
+
onPlayResume?.();
|
|
1494
|
+
} else if (streamId) {
|
|
1495
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1496
|
+
if (session) {
|
|
1497
|
+
StreamPlaybackManager.resume(streamId);
|
|
1498
|
+
} else {
|
|
1499
|
+
console.log(
|
|
1500
|
+
"[useMessageTTS] Session not found, resetting pause state"
|
|
1501
|
+
);
|
|
1502
|
+
setState((prev) => ({ ...prev, isPaused: false, isPlaying: false }));
|
|
1503
|
+
}
|
|
1218
1504
|
}
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1505
|
+
}, [streamId, onPlayResume]);
|
|
1506
|
+
const togglePlay = (0, import_react3.useCallback)(() => {
|
|
1507
|
+
if (state.isPlaying) {
|
|
1508
|
+
pause();
|
|
1509
|
+
} else {
|
|
1510
|
+
resume();
|
|
1511
|
+
}
|
|
1512
|
+
}, [state.isPlaying, pause, resume]);
|
|
1513
|
+
const playFallback = (0, import_react3.useCallback)(
|
|
1514
|
+
(text) => {
|
|
1515
|
+
console.warn("[useMessageTTS] Switching to fallback TTS");
|
|
1516
|
+
stop();
|
|
1517
|
+
isFallbackRef.current = true;
|
|
1518
|
+
setErrorState(null);
|
|
1519
|
+
const utterance = new SpeechSynthesisUtterance(text);
|
|
1520
|
+
utterance.rate = audioParams?.speech_rate || 1;
|
|
1521
|
+
const voices = window.speechSynthesis.getVoices();
|
|
1522
|
+
const zhVoice = voices.find((v) => v.lang.includes("zh"));
|
|
1523
|
+
if (zhVoice) utterance.voice = zhVoice;
|
|
1524
|
+
utterance.onstart = () => {
|
|
1525
|
+
setState((prev) => ({ ...prev, isPlaying: true, isPaused: false }));
|
|
1526
|
+
onPlayStart?.();
|
|
1527
|
+
};
|
|
1528
|
+
utterance.onend = () => {
|
|
1529
|
+
setState((prev) => ({
|
|
1530
|
+
...prev,
|
|
1531
|
+
isPlaying: false,
|
|
1532
|
+
isPaused: false,
|
|
1533
|
+
progress: 100
|
|
1534
|
+
}));
|
|
1535
|
+
onPlayEnd?.();
|
|
1536
|
+
};
|
|
1537
|
+
utterance.onerror = (e) => {
|
|
1538
|
+
console.error("[useMessageTTS] Fallback TTS failed", e);
|
|
1539
|
+
setErrorState("Fallback TTS failed");
|
|
1540
|
+
onError?.(new Error("Fallback TTS failed"));
|
|
1541
|
+
};
|
|
1542
|
+
fallbackUtteranceRef.current = utterance;
|
|
1543
|
+
window.speechSynthesis.speak(utterance);
|
|
1544
|
+
},
|
|
1545
|
+
[audioParams, onError, onPlayEnd, onPlayStart, stop]
|
|
1546
|
+
);
|
|
1547
|
+
const handleError = (0, import_react3.useCallback)(
|
|
1548
|
+
(text, failedVoice) => {
|
|
1549
|
+
if (fallbackVoice && failedVoice !== fallbackVoice) {
|
|
1550
|
+
console.warn(
|
|
1551
|
+
`[useMessageTTS] Voice ${failedVoice} failed, switching to fallback voice ${fallbackVoice}`
|
|
1552
|
+
);
|
|
1553
|
+
const newId = internalStreamId || `msg-tts-retry-${Date.now()}`;
|
|
1554
|
+
setInternalStreamId(newId);
|
|
1555
|
+
const session = StreamPlaybackManager.createSession(newId, {
|
|
1556
|
+
ttsConfig,
|
|
1557
|
+
audioParams: { ...audioParams, speaker: fallbackVoice },
|
|
1558
|
+
autoPlay,
|
|
1559
|
+
metricsCollector,
|
|
1560
|
+
visualization,
|
|
1561
|
+
onPlayStart,
|
|
1562
|
+
onPlayPause,
|
|
1563
|
+
onPlayResume,
|
|
1564
|
+
onPlayEnd,
|
|
1565
|
+
onError: () => playFallback(text)
|
|
1566
|
+
});
|
|
1567
|
+
session.play(text);
|
|
1568
|
+
} else {
|
|
1569
|
+
playFallback(text);
|
|
1570
|
+
}
|
|
1571
|
+
},
|
|
1572
|
+
[
|
|
1573
|
+
fallbackVoice,
|
|
1574
|
+
playFallback,
|
|
1575
|
+
ttsConfig,
|
|
1576
|
+
audioParams,
|
|
1577
|
+
autoPlay,
|
|
1578
|
+
metricsCollector,
|
|
1579
|
+
visualization,
|
|
1580
|
+
onPlayStart,
|
|
1581
|
+
onPlayPause,
|
|
1582
|
+
onPlayResume,
|
|
1583
|
+
onPlayEnd,
|
|
1584
|
+
internalStreamId
|
|
1585
|
+
]
|
|
1586
|
+
);
|
|
1587
|
+
const play = (0, import_react3.useCallback)(
|
|
1588
|
+
async (text) => {
|
|
1589
|
+
let shouldSwitchToIndependent = false;
|
|
1590
|
+
if (isSubscriptionMode) {
|
|
1591
|
+
const session2 = StreamPlaybackManager.getSession(externalStreamId || "");
|
|
1592
|
+
if (!session2) {
|
|
1593
|
+
console.log(
|
|
1594
|
+
"[useMessageTTS] Stream session not found, switching to independent play mode"
|
|
1595
|
+
);
|
|
1596
|
+
shouldSwitchToIndependent = true;
|
|
1597
|
+
setIsSwitchedToIndependent(true);
|
|
1598
|
+
} else if (session2.state.isStreamFinished) {
|
|
1599
|
+
console.log(
|
|
1600
|
+
"[useMessageTTS] Stream finished, switching to independent play mode"
|
|
1601
|
+
);
|
|
1602
|
+
shouldSwitchToIndependent = true;
|
|
1603
|
+
setIsSwitchedToIndependent(true);
|
|
1604
|
+
} else if (session2.state.isSynthesizing || session2.state.isPlaying) {
|
|
1605
|
+
console.warn(
|
|
1606
|
+
"[useMessageTTS] play() called in subscription mode while streaming, ignoring"
|
|
1607
|
+
);
|
|
1608
|
+
return;
|
|
1609
|
+
} else {
|
|
1610
|
+
console.log(
|
|
1611
|
+
"[useMessageTTS] Stream not active, switching to independent play mode"
|
|
1612
|
+
);
|
|
1613
|
+
shouldSwitchToIndependent = true;
|
|
1614
|
+
setIsSwitchedToIndependent(true);
|
|
1615
|
+
}
|
|
1238
1616
|
}
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1617
|
+
currentTextRef.current = text;
|
|
1618
|
+
stop();
|
|
1619
|
+
setErrorState(null);
|
|
1620
|
+
isFallbackRef.current = false;
|
|
1621
|
+
const id = `msg-tts-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
1622
|
+
const session = StreamPlaybackManager.createSession(id, {
|
|
1623
|
+
ttsConfig,
|
|
1624
|
+
audioParams,
|
|
1625
|
+
autoPlay,
|
|
1626
|
+
metricsCollector,
|
|
1627
|
+
visualization,
|
|
1628
|
+
onPlayStart,
|
|
1629
|
+
onPlayPause,
|
|
1630
|
+
onPlayResume,
|
|
1631
|
+
onPlayEnd,
|
|
1632
|
+
onError: (err) => {
|
|
1633
|
+
handleError(text, audioParams?.speaker || "");
|
|
1634
|
+
}
|
|
1635
|
+
});
|
|
1636
|
+
setInternalStreamId(id);
|
|
1637
|
+
await session.play(text);
|
|
1638
|
+
},
|
|
1639
|
+
[
|
|
1640
|
+
isSubscriptionMode,
|
|
1641
|
+
externalStreamId,
|
|
1642
|
+
stop,
|
|
1643
|
+
ttsConfig,
|
|
1644
|
+
audioParams,
|
|
1645
|
+
autoPlay,
|
|
1646
|
+
metricsCollector,
|
|
1647
|
+
visualization,
|
|
1648
|
+
onPlayStart,
|
|
1649
|
+
onPlayPause,
|
|
1650
|
+
onPlayResume,
|
|
1651
|
+
onPlayEnd,
|
|
1652
|
+
handleError
|
|
1653
|
+
]
|
|
1654
|
+
);
|
|
1655
|
+
const seek = (0, import_react3.useCallback)(
|
|
1656
|
+
(percentage) => {
|
|
1657
|
+
if (streamId) {
|
|
1658
|
+
StreamPlaybackManager.getSession(streamId)?.seek(percentage);
|
|
1257
1659
|
}
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
totalChars: this.segments.reduce((sum, seg) => sum + seg.length, 0)
|
|
1296
|
-
};
|
|
1297
|
-
}
|
|
1298
|
-
};
|
|
1660
|
+
},
|
|
1661
|
+
[streamId]
|
|
1662
|
+
);
|
|
1663
|
+
const getFrequencyData = (0, import_react3.useCallback)(
|
|
1664
|
+
() => state.visualizationData.frequencyData,
|
|
1665
|
+
[state.visualizationData]
|
|
1666
|
+
);
|
|
1667
|
+
const getTimeDomainData = (0, import_react3.useCallback)(
|
|
1668
|
+
() => state.visualizationData.timeDomainData,
|
|
1669
|
+
[state.visualizationData]
|
|
1670
|
+
);
|
|
1671
|
+
const isStreamActive = !!(externalStreamId && (state.isPlaying || state.isPaused || state.isSynthesizing));
|
|
1672
|
+
const canResume = (0, import_react3.useCallback)(() => {
|
|
1673
|
+
if (!streamId) return false;
|
|
1674
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1675
|
+
return !!session;
|
|
1676
|
+
}, [streamId]);
|
|
1677
|
+
return {
|
|
1678
|
+
isPlaying: state.isPlaying,
|
|
1679
|
+
isPaused: state.isPaused,
|
|
1680
|
+
isSynthesizing: state.isSynthesizing,
|
|
1681
|
+
progress: state.progress,
|
|
1682
|
+
error,
|
|
1683
|
+
play,
|
|
1684
|
+
pause,
|
|
1685
|
+
resume,
|
|
1686
|
+
stop,
|
|
1687
|
+
togglePlay,
|
|
1688
|
+
seek,
|
|
1689
|
+
getFrequencyData,
|
|
1690
|
+
getTimeDomainData,
|
|
1691
|
+
visualizationData: state.visualizationData,
|
|
1692
|
+
isStreamActive,
|
|
1693
|
+
streamState: state,
|
|
1694
|
+
canResume
|
|
1695
|
+
};
|
|
1696
|
+
}
|
|
1299
1697
|
|
|
1300
1698
|
// src/tts/useStreamTTS.ts
|
|
1301
|
-
var
|
|
1302
|
-
var activeInstances2 = /* @__PURE__ */ new Map();
|
|
1303
|
-
var sessionAudioCache = /* @__PURE__ */ new Map();
|
|
1304
|
-
function buildFullUrl3(url, params) {
|
|
1305
|
-
const arr = [];
|
|
1306
|
-
for (const key in params) {
|
|
1307
|
-
if (Object.prototype.hasOwnProperty.call(params, key)) {
|
|
1308
|
-
arr.push(`${key}=${encodeURIComponent(params[key])}`);
|
|
1309
|
-
}
|
|
1310
|
-
}
|
|
1311
|
-
return `${url}?${arr.join("&")}`;
|
|
1312
|
-
}
|
|
1699
|
+
var import_react4 = require("react");
|
|
1313
1700
|
function useStreamTTS({
|
|
1314
1701
|
ttsConfig,
|
|
1315
1702
|
audioParams,
|
|
@@ -1323,427 +1710,131 @@ function useStreamTTS({
|
|
|
1323
1710
|
visualization,
|
|
1324
1711
|
maxSegmentLength = 150
|
|
1325
1712
|
}) {
|
|
1326
|
-
const [
|
|
1327
|
-
const
|
|
1328
|
-
const [
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1713
|
+
const [streamId, setStreamId] = (0, import_react4.useState)("");
|
|
1714
|
+
const streamIdRef = (0, import_react4.useRef)("");
|
|
1715
|
+
const [state, setState] = (0, import_react4.useState)({
|
|
1716
|
+
isPlaying: false,
|
|
1717
|
+
isPaused: false,
|
|
1718
|
+
isSynthesizing: false,
|
|
1719
|
+
progress: 0,
|
|
1720
|
+
visualizationData: {
|
|
1721
|
+
frequencyData: new Uint8Array(0),
|
|
1722
|
+
timeDomainData: new Uint8Array(0)
|
|
1723
|
+
},
|
|
1724
|
+
error: null,
|
|
1725
|
+
isConnected: false,
|
|
1726
|
+
isSessionStarted: false,
|
|
1727
|
+
isStreamFinished: false
|
|
1337
1728
|
});
|
|
1338
|
-
const
|
|
1339
|
-
const clientRef = (0, import_react4.useRef)(null);
|
|
1340
|
-
const audioRef = (0, import_react4.useRef)(null);
|
|
1341
|
-
const audioContextRef = (0, import_react4.useRef)(null);
|
|
1342
|
-
const analyserRef = (0, import_react4.useRef)(null);
|
|
1343
|
-
const sourceRef = (0, import_react4.useRef)(null);
|
|
1344
|
-
const audioUrlRef = (0, import_react4.useRef)(null);
|
|
1729
|
+
const [streamText, setStreamText] = (0, import_react4.useState)("");
|
|
1345
1730
|
const streamTextRef = (0, import_react4.useRef)("");
|
|
1346
|
-
const isConnectedRef = (0, import_react4.useRef)(false);
|
|
1347
|
-
const isSessionStartedRef = (0, import_react4.useRef)(false);
|
|
1348
|
-
const calledSessionStartedRef = (0, import_react4.useRef)(false);
|
|
1349
|
-
const splitterRef = (0, import_react4.useRef)(null);
|
|
1350
|
-
const segmentQueueRef = (0, import_react4.useRef)([]);
|
|
1351
|
-
const isSendingRef = (0, import_react4.useRef)(false);
|
|
1352
|
-
const sessionAudioBuffersRef = (0, import_react4.useRef)([]);
|
|
1353
|
-
const isStreamFinishedRef = (0, import_react4.useRef)(false);
|
|
1354
|
-
const isSessionFinishedRef = (0, import_react4.useRef)(false);
|
|
1355
|
-
const resolveAllSegmentsSentRef = (0, import_react4.useRef)(null);
|
|
1356
|
-
const currentVoiceRef = (0, import_react4.useRef)("");
|
|
1357
|
-
const initAudioContext = (0, import_react4.useCallback)(() => {
|
|
1358
|
-
if (!audioRef.current) return;
|
|
1359
|
-
if (!audioContextRef.current) {
|
|
1360
|
-
const AudioContextClass = window.AudioContext || window.webkitAudioContext;
|
|
1361
|
-
audioContextRef.current = new AudioContextClass();
|
|
1362
|
-
}
|
|
1363
|
-
if (audioContextRef.current.state === "suspended") {
|
|
1364
|
-
audioContextRef.current.resume();
|
|
1365
|
-
}
|
|
1366
|
-
if (!analyserRef.current) {
|
|
1367
|
-
analyserRef.current = audioContextRef.current.createAnalyser();
|
|
1368
|
-
analyserRef.current.fftSize = visualization?.fftSize || 256;
|
|
1369
|
-
}
|
|
1370
|
-
if (!sourceRef.current) {
|
|
1371
|
-
try {
|
|
1372
|
-
sourceRef.current = audioContextRef.current.createMediaElementSource(audioRef.current);
|
|
1373
|
-
sourceRef.current.connect(analyserRef.current);
|
|
1374
|
-
analyserRef.current.connect(audioContextRef.current.destination);
|
|
1375
|
-
} catch (e) {
|
|
1376
|
-
}
|
|
1377
|
-
}
|
|
1378
|
-
}, [visualization?.fftSize]);
|
|
1379
|
-
const cleanupAudio = (0, import_react4.useCallback)(() => {
|
|
1380
|
-
if (audioUrlRef.current) {
|
|
1381
|
-
URL.revokeObjectURL(audioUrlRef.current);
|
|
1382
|
-
audioUrlRef.current = null;
|
|
1383
|
-
}
|
|
1384
|
-
if (audioRef.current) {
|
|
1385
|
-
audioRef.current.onerror = null;
|
|
1386
|
-
audioRef.current.onended = null;
|
|
1387
|
-
audioRef.current.onpause = null;
|
|
1388
|
-
audioRef.current.onplay = null;
|
|
1389
|
-
audioRef.current.ontimeupdate = null;
|
|
1390
|
-
audioRef.current.pause();
|
|
1391
|
-
audioRef.current.src = "";
|
|
1392
|
-
audioRef.current = null;
|
|
1393
|
-
}
|
|
1394
|
-
if (sourceRef.current) {
|
|
1395
|
-
try {
|
|
1396
|
-
sourceRef.current.disconnect();
|
|
1397
|
-
} catch (e) {
|
|
1398
|
-
}
|
|
1399
|
-
sourceRef.current = null;
|
|
1400
|
-
}
|
|
1401
|
-
}, []);
|
|
1402
|
-
const stopOthers = (0, import_react4.useCallback)(() => {
|
|
1403
|
-
activeInstances2.forEach((instance, id) => {
|
|
1404
|
-
if (id !== instanceId) {
|
|
1405
|
-
instance.pause();
|
|
1406
|
-
}
|
|
1407
|
-
});
|
|
1408
|
-
}, [instanceId]);
|
|
1409
|
-
const pause = (0, import_react4.useCallback)(() => {
|
|
1410
|
-
if (audioRef.current) {
|
|
1411
|
-
audioRef.current.pause();
|
|
1412
|
-
}
|
|
1413
|
-
setIsPaused(true);
|
|
1414
|
-
setIsPlaying(false);
|
|
1415
|
-
onPlayPause?.();
|
|
1416
|
-
}, [onPlayPause]);
|
|
1417
|
-
const resume = (0, import_react4.useCallback)(() => {
|
|
1418
|
-
stopOthers();
|
|
1419
|
-
if (audioRef.current) {
|
|
1420
|
-
audioRef.current.play();
|
|
1421
|
-
}
|
|
1422
|
-
setIsPaused(false);
|
|
1423
|
-
setIsPlaying(true);
|
|
1424
|
-
onPlayResume?.();
|
|
1425
|
-
activeInstances2.set(instanceId, { pause });
|
|
1426
|
-
}, [stopOthers, instanceId, pause, onPlayResume]);
|
|
1427
|
-
const sendNextSegment = (0, import_react4.useCallback)(() => {
|
|
1428
|
-
if (!clientRef.current || !isSessionStartedRef.current || isSendingRef.current || isSessionFinishedRef.current) {
|
|
1429
|
-
return;
|
|
1430
|
-
}
|
|
1431
|
-
if (segmentQueueRef.current.length === 0) {
|
|
1432
|
-
if (isStreamFinishedRef.current && !isSessionFinishedRef.current) {
|
|
1433
|
-
console.log("[useStreamTTS] All segments sent, finishing session");
|
|
1434
|
-
isSessionFinishedRef.current = true;
|
|
1435
|
-
clientRef.current.finishSession();
|
|
1436
|
-
resolveAllSegmentsSentRef.current?.();
|
|
1437
|
-
}
|
|
1438
|
-
return;
|
|
1439
|
-
}
|
|
1440
|
-
isSendingRef.current = true;
|
|
1441
|
-
const segment = segmentQueueRef.current.shift();
|
|
1442
|
-
console.log(`[useStreamTTS] Sending segment ${segment.index}: ${segment.content.substring(0, 30)}...`);
|
|
1443
|
-
clientRef.current.sendText(segment.content);
|
|
1444
|
-
segment.sent = true;
|
|
1445
|
-
isSendingRef.current = false;
|
|
1446
|
-
setTimeout(() => sendNextSegment(), 0);
|
|
1447
|
-
}, []);
|
|
1448
|
-
const stop = (0, import_react4.useCallback)(() => {
|
|
1449
|
-
if (clientRef.current) {
|
|
1450
|
-
clientRef.current.close();
|
|
1451
|
-
clientRef.current = null;
|
|
1452
|
-
}
|
|
1453
|
-
cleanupAudio();
|
|
1454
|
-
setIsConnected(false);
|
|
1455
|
-
isConnectedRef.current = false;
|
|
1456
|
-
setIsSessionStarted(false);
|
|
1457
|
-
isSessionStartedRef.current = false;
|
|
1458
|
-
calledSessionStartedRef.current = false;
|
|
1459
|
-
setIsPlaying(false);
|
|
1460
|
-
setIsPaused(false);
|
|
1461
|
-
setIsSynthesizing(false);
|
|
1462
|
-
setProgress(0);
|
|
1463
|
-
activeInstances2.delete(instanceId);
|
|
1464
|
-
streamTextRef.current = "";
|
|
1465
|
-
setStreamText("");
|
|
1466
|
-
segmentQueueRef.current = [];
|
|
1467
|
-
isSendingRef.current = false;
|
|
1468
|
-
sessionAudioBuffersRef.current = [];
|
|
1469
|
-
isStreamFinishedRef.current = false;
|
|
1470
|
-
isSessionFinishedRef.current = false;
|
|
1471
|
-
splitterRef.current?.reset();
|
|
1472
|
-
}, [cleanupAudio, instanceId]);
|
|
1473
1731
|
const connect = (0, import_react4.useCallback)(async () => {
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
sessionAudioBuffersRef.current = [];
|
|
1478
|
-
isStreamFinishedRef.current = false;
|
|
1732
|
+
const newStreamId = `tts-stream-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
1733
|
+
setStreamId(newStreamId);
|
|
1734
|
+
streamIdRef.current = newStreamId;
|
|
1479
1735
|
streamTextRef.current = "";
|
|
1480
1736
|
setStreamText("");
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
try {
|
|
1496
|
-
const audio = new Audio();
|
|
1497
|
-
audio.crossOrigin = "anonymous";
|
|
1498
|
-
audioRef.current = audio;
|
|
1499
|
-
audio.onplay = () => {
|
|
1500
|
-
setIsPlaying(true);
|
|
1501
|
-
setIsPaused(false);
|
|
1502
|
-
onPlayStart?.();
|
|
1503
|
-
initAudioContext();
|
|
1504
|
-
activeInstances2.set(instanceId, { pause });
|
|
1505
|
-
};
|
|
1506
|
-
audio.onended = () => {
|
|
1507
|
-
setIsPlaying(false);
|
|
1508
|
-
setIsPaused(false);
|
|
1509
|
-
onPlayEnd?.();
|
|
1510
|
-
activeInstances2.delete(instanceId);
|
|
1511
|
-
};
|
|
1512
|
-
audio.onerror = (e) => {
|
|
1513
|
-
console.error("[useStreamTTS] Audio playback error:", e, audio.error);
|
|
1514
|
-
setErrorState(audio.error?.message || "Audio playback error");
|
|
1515
|
-
onError?.(new Error(audio.error?.message || "Audio playback error"));
|
|
1516
|
-
};
|
|
1517
|
-
audio.ontimeupdate = () => {
|
|
1518
|
-
let duration = audio.duration;
|
|
1519
|
-
if (!isFinite(duration) && audio.buffered.length > 0) {
|
|
1520
|
-
duration = audio.buffered.end(audio.buffered.length - 1);
|
|
1521
|
-
}
|
|
1522
|
-
if (isFinite(duration) && duration > 0) {
|
|
1523
|
-
setProgress(audio.currentTime / duration * 100);
|
|
1524
|
-
}
|
|
1525
|
-
};
|
|
1526
|
-
clientRef.current = (0, import_tts3.WebsocketMSE)({ autoStartSession: false });
|
|
1527
|
-
splitterRef.current = new StreamingTextSplitter({
|
|
1528
|
-
maxLength: maxSegmentLength,
|
|
1529
|
-
onSegmentComplete: (segment) => {
|
|
1530
|
-
segmentQueueRef.current.push(segment);
|
|
1531
|
-
console.log(`[useStreamTTS] Segment ${segment.index} queued (${segment.length} chars)`);
|
|
1532
|
-
if (isSessionStartedRef.current) {
|
|
1533
|
-
sendNextSegment();
|
|
1534
|
-
}
|
|
1535
|
-
},
|
|
1536
|
-
onAllComplete: () => {
|
|
1537
|
-
console.log(`[useStreamTTS] All segments completed, total: ${segmentQueueRef.current.length} in queue`);
|
|
1538
|
-
}
|
|
1539
|
-
});
|
|
1540
|
-
const url = clientRef.current.start({
|
|
1541
|
-
url: buildFullUrl3(WS_URL2, {
|
|
1542
|
-
api_access_key: `Jwt; ${ttsConfig.token}`,
|
|
1543
|
-
api_app_key: ttsConfig.appid,
|
|
1544
|
-
api_resource_id: ttsConfig.resourceId || "seed-tts-2.0"
|
|
1545
|
-
}),
|
|
1546
|
-
config: {
|
|
1547
|
-
user: {
|
|
1548
|
-
uid: `req-${Date.now()}`
|
|
1549
|
-
},
|
|
1550
|
-
namespace: ttsConfig.namespace || "BidirectionalTTS",
|
|
1551
|
-
req_params: {
|
|
1552
|
-
speaker: voice,
|
|
1553
|
-
audio_params: {
|
|
1554
|
-
sample_rate: audioParams?.sample_rate || 24e3,
|
|
1555
|
-
format: audioParams?.format || "mp3",
|
|
1556
|
-
speech_rate: audioParams?.speech_rate,
|
|
1557
|
-
pitch_rate: audioParams?.pitch_rate,
|
|
1558
|
-
loudness_rate: audioParams?.loudness_rate
|
|
1559
|
-
},
|
|
1560
|
-
additions: JSON.stringify({
|
|
1561
|
-
enable_language_detector: true,
|
|
1562
|
-
disable_markdown_filter: true,
|
|
1563
|
-
enable_latex_tn: true
|
|
1564
|
-
})
|
|
1565
|
-
}
|
|
1566
|
-
},
|
|
1567
|
-
// ===== 关键回调 =====
|
|
1568
|
-
onStart: () => {
|
|
1569
|
-
setIsConnected(true);
|
|
1570
|
-
isConnectedRef.current = true;
|
|
1571
|
-
console.log("[useStreamTTS] WebSocket connected, waiting for text...");
|
|
1572
|
-
},
|
|
1573
|
-
onSessionStarted: () => {
|
|
1574
|
-
setIsSessionStarted(true);
|
|
1575
|
-
isSessionStartedRef.current = true;
|
|
1576
|
-
console.log("[useStreamTTS] Session started, can send text now");
|
|
1577
|
-
if (segmentQueueRef.current.length > 0) {
|
|
1578
|
-
sendNextSegment();
|
|
1579
|
-
}
|
|
1580
|
-
},
|
|
1581
|
-
onMessage: (data) => {
|
|
1582
|
-
setIsSynthesizing(true);
|
|
1583
|
-
if (sessionAudioBuffersRef.current.length === 0) {
|
|
1584
|
-
metricsCollector.record({
|
|
1585
|
-
name: "tts_latency",
|
|
1586
|
-
labels: { stage: "first_packet", voice },
|
|
1587
|
-
value: Date.now() - startTime,
|
|
1588
|
-
timestamp: Date.now()
|
|
1589
|
-
});
|
|
1590
|
-
}
|
|
1591
|
-
const buffer = data instanceof ArrayBuffer ? data.slice(0) : new Uint8Array(data).buffer;
|
|
1592
|
-
sessionAudioBuffersRef.current.push(buffer);
|
|
1593
|
-
},
|
|
1594
|
-
onSessionFinished: () => {
|
|
1595
|
-
setIsSynthesizing(false);
|
|
1596
|
-
setIsSessionStarted(false);
|
|
1597
|
-
isSessionStartedRef.current = false;
|
|
1598
|
-
calledSessionStartedRef.current = false;
|
|
1599
|
-
if (sessionAudioBuffersRef.current.length > 0 && streamTextRef.current) {
|
|
1600
|
-
const speed = audioParams?.speech_rate || 0;
|
|
1601
|
-
const cacheKey = TTSCache.generateKey(streamTextRef.current, voice, speed);
|
|
1602
|
-
TTSCache.set(cacheKey, [...sessionAudioBuffersRef.current]);
|
|
1603
|
-
sessionAudioCache.set(instanceId, {
|
|
1604
|
-
streamText: streamTextRef.current,
|
|
1605
|
-
audioBuffers: [...sessionAudioBuffersRef.current],
|
|
1606
|
-
timestamp: Date.now(),
|
|
1607
|
-
voice,
|
|
1608
|
-
speed
|
|
1609
|
-
});
|
|
1610
|
-
console.log(`[useStreamTTS] Session finished, cached ${sessionAudioBuffersRef.current.length} audio buffers`);
|
|
1611
|
-
}
|
|
1612
|
-
metricsCollector.record({
|
|
1613
|
-
name: "tts_synthesis_finished",
|
|
1614
|
-
labels: { voice, text_length: streamTextRef.current.length },
|
|
1615
|
-
value: Date.now() - startTime,
|
|
1616
|
-
timestamp: Date.now()
|
|
1617
|
-
});
|
|
1618
|
-
},
|
|
1619
|
-
onError: (err) => {
|
|
1620
|
-
console.error("[useStreamTTS] TTS error:", err);
|
|
1621
|
-
setErrorState(err.msg || "TTS error");
|
|
1622
|
-
onError?.(new Error(err.msg || "TTS error"));
|
|
1623
|
-
setIsSynthesizing(false);
|
|
1624
|
-
}
|
|
1625
|
-
});
|
|
1626
|
-
audioUrlRef.current = url;
|
|
1627
|
-
audio.src = url;
|
|
1628
|
-
if (autoPlay) {
|
|
1629
|
-
try {
|
|
1630
|
-
await audio.play();
|
|
1631
|
-
} catch (e) {
|
|
1632
|
-
console.warn("[useStreamTTS] Autoplay blocked:", e);
|
|
1633
|
-
}
|
|
1737
|
+
const session = StreamPlaybackManager.createSession(newStreamId, {
|
|
1738
|
+
ttsConfig,
|
|
1739
|
+
audioParams,
|
|
1740
|
+
autoPlay,
|
|
1741
|
+
metricsCollector,
|
|
1742
|
+
visualization,
|
|
1743
|
+
maxSegmentLength,
|
|
1744
|
+
onPlayStart,
|
|
1745
|
+
onPlayPause,
|
|
1746
|
+
onPlayResume,
|
|
1747
|
+
onPlayEnd,
|
|
1748
|
+
onError: (err) => {
|
|
1749
|
+
setState((prev) => ({ ...prev, error: err.message }));
|
|
1750
|
+
onError?.(err);
|
|
1634
1751
|
}
|
|
1635
|
-
}
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
1639
|
-
}
|
|
1752
|
+
});
|
|
1753
|
+
await session.connect();
|
|
1754
|
+
return newStreamId;
|
|
1640
1755
|
}, [
|
|
1641
1756
|
ttsConfig,
|
|
1642
1757
|
audioParams,
|
|
1643
1758
|
autoPlay,
|
|
1644
|
-
stop,
|
|
1645
|
-
instanceId,
|
|
1646
|
-
onPlayStart,
|
|
1647
|
-
onPlayEnd,
|
|
1648
|
-
initAudioContext,
|
|
1649
|
-
pause,
|
|
1650
1759
|
metricsCollector,
|
|
1760
|
+
visualization,
|
|
1651
1761
|
maxSegmentLength,
|
|
1652
|
-
|
|
1762
|
+
onPlayStart,
|
|
1763
|
+
onPlayPause,
|
|
1764
|
+
onPlayResume,
|
|
1765
|
+
onPlayEnd,
|
|
1653
1766
|
onError
|
|
1654
1767
|
]);
|
|
1768
|
+
(0, import_react4.useEffect)(() => {
|
|
1769
|
+
if (!streamId) return;
|
|
1770
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1771
|
+
if (!session) return;
|
|
1772
|
+
const unsubscribe = session.subscribe((newState) => {
|
|
1773
|
+
setState(newState);
|
|
1774
|
+
});
|
|
1775
|
+
return () => {
|
|
1776
|
+
unsubscribe();
|
|
1777
|
+
};
|
|
1778
|
+
}, [streamId]);
|
|
1655
1779
|
const onMessage = (0, import_react4.useCallback)((chunk) => {
|
|
1656
|
-
if (!
|
|
1780
|
+
if (!streamIdRef.current) return;
|
|
1657
1781
|
streamTextRef.current += chunk;
|
|
1658
1782
|
setStreamText(streamTextRef.current);
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
calledSessionStartedRef.current = true;
|
|
1662
|
-
clientRef.current.startSession();
|
|
1663
|
-
}
|
|
1664
|
-
splitterRef.current?.onChunk(chunk);
|
|
1783
|
+
const session = StreamPlaybackManager.getSession(streamIdRef.current);
|
|
1784
|
+
session?.handleStreamChunk(chunk);
|
|
1665
1785
|
}, []);
|
|
1666
1786
|
const finishStream = (0, import_react4.useCallback)(async () => {
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
if (segmentQueueRef.current.length > 0 || isSendingRef.current) {
|
|
1671
|
-
await new Promise((resolve) => {
|
|
1672
|
-
resolveAllSegmentsSentRef.current = resolve;
|
|
1673
|
-
});
|
|
1674
|
-
} else if (clientRef.current && isSessionStartedRef.current && !isSessionFinishedRef.current) {
|
|
1675
|
-
isSessionFinishedRef.current = true;
|
|
1676
|
-
clientRef.current.finishSession();
|
|
1677
|
-
}
|
|
1787
|
+
if (!streamIdRef.current) return;
|
|
1788
|
+
const session = StreamPlaybackManager.getSession(streamIdRef.current);
|
|
1789
|
+
await session?.finishStream();
|
|
1678
1790
|
}, []);
|
|
1679
|
-
const
|
|
1680
|
-
if (
|
|
1681
|
-
|
|
1682
|
-
if (!isFinite(duration) && audioRef.current.buffered.length > 0) {
|
|
1683
|
-
duration = audioRef.current.buffered.end(audioRef.current.buffered.length - 1);
|
|
1684
|
-
}
|
|
1685
|
-
if (isFinite(duration) && duration > 0) {
|
|
1686
|
-
const time = percentage / 100 * duration;
|
|
1687
|
-
if (isFinite(time)) {
|
|
1688
|
-
audioRef.current.currentTime = time;
|
|
1689
|
-
setProgress(percentage);
|
|
1690
|
-
}
|
|
1691
|
-
}
|
|
1791
|
+
const pause = (0, import_react4.useCallback)(() => {
|
|
1792
|
+
if (streamIdRef.current) {
|
|
1793
|
+
StreamPlaybackManager.pause(streamIdRef.current);
|
|
1692
1794
|
}
|
|
1693
1795
|
}, []);
|
|
1694
|
-
const
|
|
1695
|
-
if (
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
return dataArray;
|
|
1796
|
+
const resume = (0, import_react4.useCallback)(() => {
|
|
1797
|
+
if (streamIdRef.current) {
|
|
1798
|
+
StreamPlaybackManager.resume(streamIdRef.current);
|
|
1799
|
+
}
|
|
1699
1800
|
}, []);
|
|
1700
|
-
const
|
|
1701
|
-
if (
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1801
|
+
const stop = (0, import_react4.useCallback)(() => {
|
|
1802
|
+
if (streamIdRef.current) {
|
|
1803
|
+
StreamPlaybackManager.stop(streamIdRef.current);
|
|
1804
|
+
setStreamId("");
|
|
1805
|
+
streamIdRef.current = "";
|
|
1806
|
+
}
|
|
1705
1807
|
}, []);
|
|
1706
|
-
(0, import_react4.
|
|
1707
|
-
if (
|
|
1708
|
-
|
|
1709
|
-
let lastUpdate = 0;
|
|
1710
|
-
const interval = visualization.refreshInterval || 0;
|
|
1711
|
-
const update = (timestamp) => {
|
|
1712
|
-
if (isPlaying && !isPaused) {
|
|
1713
|
-
if (timestamp - lastUpdate >= interval) {
|
|
1714
|
-
setVisualizationData({
|
|
1715
|
-
frequencyData: getFrequencyData(),
|
|
1716
|
-
timeDomainData: getTimeDomainData()
|
|
1717
|
-
});
|
|
1718
|
-
lastUpdate = timestamp;
|
|
1719
|
-
}
|
|
1720
|
-
animId = requestAnimationFrame(update);
|
|
1721
|
-
}
|
|
1722
|
-
};
|
|
1723
|
-
if (isPlaying && !isPaused) {
|
|
1724
|
-
animId = requestAnimationFrame(update);
|
|
1808
|
+
const seek = (0, import_react4.useCallback)((percentage) => {
|
|
1809
|
+
if (streamIdRef.current) {
|
|
1810
|
+
StreamPlaybackManager.getSession(streamIdRef.current)?.seek(percentage);
|
|
1725
1811
|
}
|
|
1726
|
-
|
|
1727
|
-
if (animId) cancelAnimationFrame(animId);
|
|
1728
|
-
};
|
|
1729
|
-
}, [isPlaying, isPaused, visualization, getFrequencyData, getTimeDomainData]);
|
|
1812
|
+
}, []);
|
|
1730
1813
|
(0, import_react4.useEffect)(() => {
|
|
1731
1814
|
return () => {
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
audioContextRef.current.close();
|
|
1815
|
+
if (streamIdRef.current) {
|
|
1816
|
+
StreamPlaybackManager.stop(streamIdRef.current);
|
|
1735
1817
|
}
|
|
1736
1818
|
};
|
|
1737
|
-
}, [
|
|
1819
|
+
}, []);
|
|
1820
|
+
const getFrequencyData = (0, import_react4.useCallback)(
|
|
1821
|
+
() => state.visualizationData.frequencyData,
|
|
1822
|
+
[state.visualizationData]
|
|
1823
|
+
);
|
|
1824
|
+
const getTimeDomainData = (0, import_react4.useCallback)(
|
|
1825
|
+
() => state.visualizationData.timeDomainData,
|
|
1826
|
+
[state.visualizationData]
|
|
1827
|
+
);
|
|
1738
1828
|
return {
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1829
|
+
streamId,
|
|
1830
|
+
isConnected: state.isConnected,
|
|
1831
|
+
isSessionStarted: state.isSessionStarted,
|
|
1832
|
+
isSynthesizing: state.isSynthesizing,
|
|
1833
|
+
isPlaying: state.isPlaying,
|
|
1834
|
+
isPaused: state.isPaused,
|
|
1835
|
+
error: state.error,
|
|
1745
1836
|
streamText,
|
|
1746
|
-
progress,
|
|
1837
|
+
progress: state.progress,
|
|
1747
1838
|
connect,
|
|
1748
1839
|
onMessage,
|
|
1749
1840
|
finishStream,
|
|
@@ -1753,23 +1844,9 @@ function useStreamTTS({
|
|
|
1753
1844
|
seek,
|
|
1754
1845
|
getFrequencyData,
|
|
1755
1846
|
getTimeDomainData,
|
|
1756
|
-
visualizationData
|
|
1847
|
+
visualizationData: state.visualizationData
|
|
1757
1848
|
};
|
|
1758
1849
|
}
|
|
1759
|
-
function getSessionAudioCache(instanceId) {
|
|
1760
|
-
return sessionAudioCache.get(instanceId);
|
|
1761
|
-
}
|
|
1762
|
-
function clearSessionAudioCache(instanceId) {
|
|
1763
|
-
sessionAudioCache.delete(instanceId);
|
|
1764
|
-
}
|
|
1765
|
-
function findSessionCacheByText(streamText, voice, speed) {
|
|
1766
|
-
for (const entry of sessionAudioCache.values()) {
|
|
1767
|
-
if (entry.streamText === streamText && entry.voice === voice && entry.speed === speed) {
|
|
1768
|
-
return entry;
|
|
1769
|
-
}
|
|
1770
|
-
}
|
|
1771
|
-
return void 0;
|
|
1772
|
-
}
|
|
1773
1850
|
|
|
1774
1851
|
// src/components/AudioWaveVisualizer.tsx
|
|
1775
1852
|
var import_react5 = require("react");
|
|
@@ -2145,10 +2222,8 @@ var AudioProgressBar_default = AudioProgressBar;
|
|
|
2145
2222
|
0 && (module.exports = {
|
|
2146
2223
|
AudioProgressBar,
|
|
2147
2224
|
AudioWaveVisualizer,
|
|
2225
|
+
StreamPlaybackManager,
|
|
2148
2226
|
StreamingTextSplitter,
|
|
2149
|
-
clearSessionAudioCache,
|
|
2150
|
-
findSessionCacheByText,
|
|
2151
|
-
getSessionAudioCache,
|
|
2152
2227
|
splitTextByDelimiters,
|
|
2153
2228
|
useMessageTTS,
|
|
2154
2229
|
useStreamTTS,
|