@wq-hook/volcano-react 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +129 -40
- package/dist/index.d.ts +129 -40
- package/dist/index.js +1080 -1113
- package/dist/index.mjs +1079 -1110
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -32,10 +32,8 @@ var index_exports = {};
|
|
|
32
32
|
__export(index_exports, {
|
|
33
33
|
AudioProgressBar: () => AudioProgressBar_default,
|
|
34
34
|
AudioWaveVisualizer: () => AudioWaveVisualizer_default,
|
|
35
|
+
StreamPlaybackManager: () => StreamPlaybackManager,
|
|
35
36
|
StreamingTextSplitter: () => StreamingTextSplitter,
|
|
36
|
-
clearSessionAudioCache: () => clearSessionAudioCache,
|
|
37
|
-
findSessionCacheByText: () => findSessionCacheByText,
|
|
38
|
-
getSessionAudioCache: () => getSessionAudioCache,
|
|
39
37
|
splitTextByDelimiters: () => splitTextByDelimiters,
|
|
40
38
|
useMessageTTS: () => useMessageTTS,
|
|
41
39
|
useStreamTTS: () => useStreamTTS,
|
|
@@ -442,10 +440,253 @@ function useVolcanoTTS({
|
|
|
442
440
|
}
|
|
443
441
|
|
|
444
442
|
// src/tts/useMessageTTS.ts
|
|
443
|
+
var import_react3 = require("react");
|
|
444
|
+
|
|
445
|
+
// src/tts/StreamPlaybackManager.ts
|
|
445
446
|
var import_tts2 = require("@wq-hook/volcano-sdk/tts");
|
|
447
|
+
|
|
448
|
+
// src/tts/StreamingTextSplitter.ts
|
|
446
449
|
var import_volcano_sdk2 = require("@wq-hook/volcano-sdk");
|
|
447
|
-
var import_react3 = require("react");
|
|
448
450
|
var import_emoji_regex2 = __toESM(require("emoji-regex"));
|
|
451
|
+
var StreamingTextSplitter = class {
|
|
452
|
+
constructor(options = {}) {
|
|
453
|
+
/** 当前缓冲区 */
|
|
454
|
+
this.buffer = "";
|
|
455
|
+
/** 分段索引计数器 */
|
|
456
|
+
this.segmentIndex = 0;
|
|
457
|
+
/** 已完成的分段列表 */
|
|
458
|
+
this.segments = [];
|
|
459
|
+
/** 是否已完成 */
|
|
460
|
+
this.isCompleted = false;
|
|
461
|
+
this.maxLength = options.maxLength || 150;
|
|
462
|
+
this.minLength = options.minLength || 10;
|
|
463
|
+
this.onSegmentComplete = options.onSegmentComplete;
|
|
464
|
+
this.onAllComplete = options.onAllComplete;
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* 接收流式文本块
|
|
468
|
+
* @param chunk - 文本块
|
|
469
|
+
*/
|
|
470
|
+
onChunk(chunk) {
|
|
471
|
+
if (!chunk || this.isCompleted) return;
|
|
472
|
+
this.buffer += chunk;
|
|
473
|
+
if (this.detectBoundary(chunk)) {
|
|
474
|
+
const newlineIndex = this.buffer.indexOf("\n");
|
|
475
|
+
if (newlineIndex !== -1) {
|
|
476
|
+
if (newlineIndex === 0) {
|
|
477
|
+
this.buffer = this.buffer.substring(1);
|
|
478
|
+
return;
|
|
479
|
+
}
|
|
480
|
+
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
481
|
+
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
482
|
+
this.flushSegmentWithBuffer(segmentBuffer);
|
|
483
|
+
while (this.buffer.includes("\n")) {
|
|
484
|
+
const nextNewlineIndex = this.buffer.indexOf("\n");
|
|
485
|
+
if (nextNewlineIndex === 0) {
|
|
486
|
+
this.buffer = this.buffer.substring(1);
|
|
487
|
+
continue;
|
|
488
|
+
}
|
|
489
|
+
const nextSegmentBuffer = this.buffer.substring(0, nextNewlineIndex);
|
|
490
|
+
this.buffer = this.buffer.substring(nextNewlineIndex + 1);
|
|
491
|
+
this.flushSegmentWithBuffer(nextSegmentBuffer);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
/**
|
|
497
|
+
* 检测分段边界
|
|
498
|
+
* @param chunk - 最新接收的文本块
|
|
499
|
+
* @returns 是否应该分段
|
|
500
|
+
*/
|
|
501
|
+
detectBoundary(chunk) {
|
|
502
|
+
if (chunk.includes("\n")) {
|
|
503
|
+
if (this.buffer.length >= this.maxLength) {
|
|
504
|
+
this.forceSplitAtSentenceBoundary();
|
|
505
|
+
}
|
|
506
|
+
return true;
|
|
507
|
+
}
|
|
508
|
+
if (this.buffer.length >= this.maxLength) {
|
|
509
|
+
this.forceSplitAtSentenceBoundary();
|
|
510
|
+
return true;
|
|
511
|
+
}
|
|
512
|
+
return false;
|
|
513
|
+
}
|
|
514
|
+
/**
|
|
515
|
+
* 在句子边界强制拆分超长段落
|
|
516
|
+
*/
|
|
517
|
+
forceSplitAtSentenceBoundary() {
|
|
518
|
+
const content = this.buffer;
|
|
519
|
+
const sentenceEnders = /[。?!]/g;
|
|
520
|
+
let lastMatch = null;
|
|
521
|
+
let match = null;
|
|
522
|
+
while ((match = sentenceEnders.exec(content)) !== null) {
|
|
523
|
+
lastMatch = match;
|
|
524
|
+
}
|
|
525
|
+
if (lastMatch && lastMatch.index > this.minLength) {
|
|
526
|
+
const splitPoint = lastMatch.index + 1;
|
|
527
|
+
const firstPart = content.substring(0, splitPoint);
|
|
528
|
+
const secondPart = content.substring(splitPoint);
|
|
529
|
+
this.buffer = firstPart;
|
|
530
|
+
this.flushSegment();
|
|
531
|
+
this.buffer = secondPart;
|
|
532
|
+
} else {
|
|
533
|
+
const midPoint = Math.floor(content.length / 2);
|
|
534
|
+
const firstPart = content.substring(0, midPoint);
|
|
535
|
+
const secondPart = content.substring(midPoint);
|
|
536
|
+
this.buffer = firstPart;
|
|
537
|
+
this.flushSegment();
|
|
538
|
+
this.buffer = secondPart;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
/**
|
|
542
|
+
* 使用指定缓冲区内容刷新为分段
|
|
543
|
+
* @param bufferToFlush - 要分段的缓冲区内容
|
|
544
|
+
*/
|
|
545
|
+
flushSegmentWithBuffer(bufferToFlush) {
|
|
546
|
+
const content = bufferToFlush;
|
|
547
|
+
if (!content) return;
|
|
548
|
+
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
549
|
+
const isTooShort = content.length < 3;
|
|
550
|
+
if (isPureSymbols && isTooShort) {
|
|
551
|
+
return;
|
|
552
|
+
}
|
|
553
|
+
const formattedContent = import_volcano_sdk2.MarkdownFormatter.format(content).replace((0, import_emoji_regex2.default)(), "");
|
|
554
|
+
if (!formattedContent) return;
|
|
555
|
+
let subSegments = [formattedContent];
|
|
556
|
+
if (formattedContent.length > this.maxLength) {
|
|
557
|
+
subSegments = this.splitLongSegment(formattedContent);
|
|
558
|
+
}
|
|
559
|
+
for (const subSegment of subSegments) {
|
|
560
|
+
if (!subSegment) continue;
|
|
561
|
+
const segment = {
|
|
562
|
+
index: this.segmentIndex++,
|
|
563
|
+
content: subSegment,
|
|
564
|
+
length: subSegment.length,
|
|
565
|
+
sent: false
|
|
566
|
+
};
|
|
567
|
+
this.segments.push(segment);
|
|
568
|
+
this.onSegmentComplete?.(segment);
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
/**
|
|
572
|
+
* 刷新当前缓冲区为分段
|
|
573
|
+
*/
|
|
574
|
+
flushSegment() {
|
|
575
|
+
const content = this.buffer.trim();
|
|
576
|
+
if (!content) {
|
|
577
|
+
this.buffer = "";
|
|
578
|
+
return;
|
|
579
|
+
}
|
|
580
|
+
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
581
|
+
const isTooShort = content.length < 3;
|
|
582
|
+
if (isPureSymbols && isTooShort) {
|
|
583
|
+
this.buffer = "";
|
|
584
|
+
return;
|
|
585
|
+
}
|
|
586
|
+
const formattedContent = import_volcano_sdk2.MarkdownFormatter.format(content).replace((0, import_emoji_regex2.default)(), "");
|
|
587
|
+
if (!formattedContent) {
|
|
588
|
+
this.buffer = "";
|
|
589
|
+
return;
|
|
590
|
+
}
|
|
591
|
+
let subSegments = [formattedContent];
|
|
592
|
+
if (formattedContent.length > this.maxLength) {
|
|
593
|
+
subSegments = this.splitLongSegment(formattedContent);
|
|
594
|
+
}
|
|
595
|
+
for (const subSegment of subSegments) {
|
|
596
|
+
if (!subSegment) continue;
|
|
597
|
+
const segment = {
|
|
598
|
+
index: this.segmentIndex++,
|
|
599
|
+
content: subSegment,
|
|
600
|
+
length: subSegment.length,
|
|
601
|
+
sent: false
|
|
602
|
+
};
|
|
603
|
+
this.segments.push(segment);
|
|
604
|
+
this.onSegmentComplete?.(segment);
|
|
605
|
+
}
|
|
606
|
+
this.buffer = "";
|
|
607
|
+
}
|
|
608
|
+
/**
|
|
609
|
+
* 拆分超长分段
|
|
610
|
+
* @param segment - 超长的分段
|
|
611
|
+
* @returns 拆分后的分段数组
|
|
612
|
+
*/
|
|
613
|
+
splitLongSegment(segment) {
|
|
614
|
+
const result = [];
|
|
615
|
+
let current = "";
|
|
616
|
+
for (const char of segment) {
|
|
617
|
+
current += char;
|
|
618
|
+
const shouldSplit = /[。?!,,]/.test(char);
|
|
619
|
+
if (shouldSplit && current.length <= this.maxLength) {
|
|
620
|
+
result.push(current);
|
|
621
|
+
current = "";
|
|
622
|
+
} else if (current.length >= this.maxLength) {
|
|
623
|
+
result.push(current);
|
|
624
|
+
current = "";
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
if (current) {
|
|
628
|
+
result.push(current);
|
|
629
|
+
}
|
|
630
|
+
return result.filter((s) => s.length > 0);
|
|
631
|
+
}
|
|
632
|
+
/**
|
|
633
|
+
* 完成流式输入
|
|
634
|
+
* 处理剩余的缓冲区内容
|
|
635
|
+
*/
|
|
636
|
+
complete() {
|
|
637
|
+
if (this.isCompleted) return;
|
|
638
|
+
this.isCompleted = true;
|
|
639
|
+
while (this.buffer.includes("\n")) {
|
|
640
|
+
const newlineIndex = this.buffer.indexOf("\n");
|
|
641
|
+
if (newlineIndex === 0) {
|
|
642
|
+
this.buffer = this.buffer.substring(1);
|
|
643
|
+
continue;
|
|
644
|
+
}
|
|
645
|
+
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
646
|
+
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
647
|
+
this.flushSegmentWithBuffer(segmentBuffer);
|
|
648
|
+
}
|
|
649
|
+
if (this.buffer.trim()) {
|
|
650
|
+
this.flushSegment();
|
|
651
|
+
}
|
|
652
|
+
this.onAllComplete?.(this.segments);
|
|
653
|
+
}
|
|
654
|
+
/**
|
|
655
|
+
* 重置分段器状态
|
|
656
|
+
*/
|
|
657
|
+
reset() {
|
|
658
|
+
this.buffer = "";
|
|
659
|
+
this.segmentIndex = 0;
|
|
660
|
+
this.segments = [];
|
|
661
|
+
this.isCompleted = false;
|
|
662
|
+
}
|
|
663
|
+
/**
|
|
664
|
+
* 获取当前缓冲区内容
|
|
665
|
+
*/
|
|
666
|
+
getBuffer() {
|
|
667
|
+
return this.buffer;
|
|
668
|
+
}
|
|
669
|
+
/**
|
|
670
|
+
* 获取已分段的列表
|
|
671
|
+
*/
|
|
672
|
+
getSegments() {
|
|
673
|
+
return this.segments;
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* 获取统计信息
|
|
677
|
+
*/
|
|
678
|
+
getStats() {
|
|
679
|
+
return {
|
|
680
|
+
bufferLength: this.buffer.length,
|
|
681
|
+
segmentCount: this.segments.length,
|
|
682
|
+
totalChars: this.segments.reduce((sum, seg) => sum + seg.length, 0)
|
|
683
|
+
};
|
|
684
|
+
}
|
|
685
|
+
};
|
|
686
|
+
|
|
687
|
+
// src/tts/StreamPlaybackManager.ts
|
|
688
|
+
var import_emoji_regex3 = __toESM(require("emoji-regex"));
|
|
689
|
+
var import_volcano_sdk3 = require("@wq-hook/volcano-sdk");
|
|
449
690
|
|
|
450
691
|
// src/tts/TextSplitter.ts
|
|
451
692
|
function splitTextByDelimiters(text, minLength = 10, maxLength = 150) {
|
|
@@ -515,338 +756,178 @@ function splitTextByDelimiters(text, minLength = 10, maxLength = 150) {
|
|
|
515
756
|
return segments;
|
|
516
757
|
}
|
|
517
758
|
|
|
518
|
-
// src/tts/
|
|
519
|
-
var NoopMetricsCollector = class {
|
|
520
|
-
record(_metric) {
|
|
521
|
-
}
|
|
522
|
-
};
|
|
523
|
-
|
|
524
|
-
// src/tts/useMessageTTS.ts
|
|
759
|
+
// src/tts/StreamPlaybackManager.ts
|
|
525
760
|
var WS_URL = "wss://openspeech.bytedance.com/api/v3/tts/bidirection";
|
|
526
|
-
var activeInstances = /* @__PURE__ */ new Map();
|
|
527
761
|
function buildFullUrl2(url, params) {
|
|
528
|
-
const { ...auth } = params;
|
|
529
762
|
const arr = [];
|
|
530
|
-
for (const key in
|
|
531
|
-
if (Object.prototype.hasOwnProperty.call(
|
|
763
|
+
for (const key in params) {
|
|
764
|
+
if (Object.prototype.hasOwnProperty.call(params, key)) {
|
|
532
765
|
arr.push(
|
|
533
|
-
`${key}=${encodeURIComponent(
|
|
766
|
+
`${key}=${encodeURIComponent(params[key])}`
|
|
534
767
|
);
|
|
535
768
|
}
|
|
536
769
|
}
|
|
537
770
|
return `${url}?${arr.join("&")}`;
|
|
538
771
|
}
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
const initAudioContext = (0, import_react3.useCallback)(() => {
|
|
586
|
-
if (!audioRef.current) return;
|
|
587
|
-
if (!audioContextRef.current) {
|
|
772
|
+
var PlaybackSession = class {
|
|
773
|
+
constructor(id, config) {
|
|
774
|
+
this.listeners = /* @__PURE__ */ new Set();
|
|
775
|
+
this.audioContext = null;
|
|
776
|
+
this.analyser = null;
|
|
777
|
+
this.source = null;
|
|
778
|
+
this.audioUrl = null;
|
|
779
|
+
// TTS Resources
|
|
780
|
+
this.client = null;
|
|
781
|
+
this.splitter = null;
|
|
782
|
+
// Internal State
|
|
783
|
+
this.segmentQueue = [];
|
|
784
|
+
this.isSending = false;
|
|
785
|
+
this.isSessionStarting = false;
|
|
786
|
+
this.streamText = "";
|
|
787
|
+
this.sessionAudioBuffers = [];
|
|
788
|
+
this.isStreamFinished = false;
|
|
789
|
+
this.isSessionFinished = false;
|
|
790
|
+
this.resolveAllSegmentsSent = null;
|
|
791
|
+
this.animId = null;
|
|
792
|
+
this.lastVisUpdate = 0;
|
|
793
|
+
this.id = id;
|
|
794
|
+
this.config = config;
|
|
795
|
+
this.state = {
|
|
796
|
+
isPlaying: false,
|
|
797
|
+
isPaused: false,
|
|
798
|
+
isSynthesizing: false,
|
|
799
|
+
progress: 0,
|
|
800
|
+
visualizationData: {
|
|
801
|
+
frequencyData: new Uint8Array(0),
|
|
802
|
+
timeDomainData: new Uint8Array(0)
|
|
803
|
+
},
|
|
804
|
+
error: null,
|
|
805
|
+
isConnected: false,
|
|
806
|
+
isSessionStarted: false,
|
|
807
|
+
isStreamFinished: false
|
|
808
|
+
};
|
|
809
|
+
this.audio = new Audio();
|
|
810
|
+
this.audio.crossOrigin = "anonymous";
|
|
811
|
+
this.setupAudioListeners();
|
|
812
|
+
}
|
|
813
|
+
/**
|
|
814
|
+
* 初始化 AudioContext(用于可视化)
|
|
815
|
+
*/
|
|
816
|
+
initAudioContext() {
|
|
817
|
+
if (!this.audioContext) {
|
|
588
818
|
const AudioContextClass = window.AudioContext || window.webkitAudioContext;
|
|
589
|
-
|
|
819
|
+
this.audioContext = new AudioContextClass();
|
|
590
820
|
}
|
|
591
|
-
if (
|
|
592
|
-
|
|
821
|
+
if (this.audioContext.state === "suspended") {
|
|
822
|
+
this.audioContext.resume();
|
|
593
823
|
}
|
|
594
|
-
if (!
|
|
595
|
-
|
|
596
|
-
|
|
824
|
+
if (!this.analyser && this.audioContext) {
|
|
825
|
+
this.analyser = this.audioContext.createAnalyser();
|
|
826
|
+
this.analyser.fftSize = this.config.visualization?.fftSize || 256;
|
|
597
827
|
}
|
|
598
|
-
if (!
|
|
828
|
+
if (!this.source && this.audioContext && this.analyser) {
|
|
599
829
|
try {
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
);
|
|
603
|
-
sourceRef.current.connect(analyserRef.current);
|
|
604
|
-
analyserRef.current.connect(audioContextRef.current.destination);
|
|
830
|
+
this.source = this.audioContext.createMediaElementSource(this.audio);
|
|
831
|
+
this.source.connect(this.analyser);
|
|
832
|
+
this.analyser.connect(this.audioContext.destination);
|
|
605
833
|
} catch (e) {
|
|
606
834
|
}
|
|
607
835
|
}
|
|
608
|
-
}
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
836
|
+
}
|
|
837
|
+
setupAudioListeners() {
|
|
838
|
+
this.audio.onplay = () => {
|
|
839
|
+
this.updateState({ isPlaying: true, isPaused: false });
|
|
840
|
+
this.config.onPlayStart?.();
|
|
841
|
+
this.initAudioContext();
|
|
842
|
+
this.startVisualizationLoop();
|
|
843
|
+
};
|
|
844
|
+
this.audio.onpause = () => {
|
|
845
|
+
this.updateState({ isPaused: true, isPlaying: false });
|
|
846
|
+
this.config.onPlayPause?.();
|
|
847
|
+
};
|
|
848
|
+
this.audio.onended = () => {
|
|
849
|
+
this.updateState({
|
|
850
|
+
isPlaying: false,
|
|
851
|
+
isPaused: false,
|
|
852
|
+
isSynthesizing: false,
|
|
853
|
+
progress: 100
|
|
854
|
+
});
|
|
855
|
+
this.config.onPlayEnd?.();
|
|
856
|
+
this.stopVisualizationLoop();
|
|
857
|
+
};
|
|
858
|
+
this.audio.onerror = (e) => {
|
|
859
|
+
const msg = this.audio.error?.message || "Audio playback error";
|
|
860
|
+
console.error("[PlaybackSession] Audio error:", msg);
|
|
861
|
+
this.updateState({ error: msg });
|
|
862
|
+
this.config.onError?.(new Error(msg));
|
|
863
|
+
};
|
|
864
|
+
this.audio.ontimeupdate = () => {
|
|
865
|
+
let duration = this.audio.duration;
|
|
866
|
+
if (!isFinite(duration) && this.audio.buffered.length > 0) {
|
|
867
|
+
duration = this.audio.buffered.end(this.audio.buffered.length - 1);
|
|
628
868
|
}
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
window.speechSynthesis.cancel();
|
|
633
|
-
fallbackUtteranceRef.current = null;
|
|
634
|
-
}
|
|
635
|
-
isFallbackRef.current = false;
|
|
636
|
-
}, []);
|
|
637
|
-
const stop = (0, import_react3.useCallback)(() => {
|
|
638
|
-
if (clientRef.current) {
|
|
639
|
-
clientRef.current.close();
|
|
640
|
-
clientRef.current = null;
|
|
641
|
-
}
|
|
642
|
-
cleanupAudio();
|
|
643
|
-
setIsPlaying(false);
|
|
644
|
-
setIsPaused(false);
|
|
645
|
-
setIsSynthesizing(false);
|
|
646
|
-
setProgress(0);
|
|
647
|
-
activeInstances.delete(instanceId);
|
|
648
|
-
}, [cleanupAudio, instanceId]);
|
|
649
|
-
const pause = (0, import_react3.useCallback)(() => {
|
|
650
|
-
if (isFallbackRef.current) {
|
|
651
|
-
window.speechSynthesis.pause();
|
|
652
|
-
} else if (audioRef.current) {
|
|
653
|
-
audioRef.current.pause();
|
|
654
|
-
}
|
|
655
|
-
setIsPaused(true);
|
|
656
|
-
setIsPlaying(false);
|
|
657
|
-
onPlayPause?.();
|
|
658
|
-
}, [onPlayPause]);
|
|
659
|
-
const resume = (0, import_react3.useCallback)(() => {
|
|
660
|
-
stopOthers();
|
|
661
|
-
if (isFallbackRef.current) {
|
|
662
|
-
window.speechSynthesis.resume();
|
|
663
|
-
} else if (audioRef.current) {
|
|
664
|
-
audioRef.current.play();
|
|
665
|
-
}
|
|
666
|
-
setIsPaused(false);
|
|
667
|
-
setIsPlaying(true);
|
|
668
|
-
onPlayResume?.();
|
|
669
|
-
activeInstances.set(instanceId, { pause });
|
|
670
|
-
}, [stopOthers, instanceId, pause, onPlayResume]);
|
|
671
|
-
const togglePlay = (0, import_react3.useCallback)(() => {
|
|
672
|
-
if (isPlaying) {
|
|
673
|
-
pause();
|
|
674
|
-
} else {
|
|
675
|
-
resume();
|
|
676
|
-
}
|
|
677
|
-
}, [isPlaying, pause, resume]);
|
|
678
|
-
const playFallback = (0, import_react3.useCallback)(
|
|
679
|
-
(text) => {
|
|
680
|
-
console.warn("[useMessageTTS] Switching to fallback TTS");
|
|
681
|
-
isFallbackRef.current = true;
|
|
682
|
-
if (clientRef.current) {
|
|
683
|
-
clientRef.current.close();
|
|
684
|
-
clientRef.current = null;
|
|
869
|
+
if (isFinite(duration) && duration > 0) {
|
|
870
|
+
const progress = this.audio.currentTime / duration * 100;
|
|
871
|
+
this.updateState({ progress });
|
|
685
872
|
}
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
873
|
+
};
|
|
874
|
+
}
|
|
875
|
+
/**
|
|
876
|
+
* 建立 WebSocket 连接
|
|
877
|
+
*/
|
|
878
|
+
async connect() {
|
|
879
|
+
if (this.state.isConnected) return;
|
|
880
|
+
this.updateState({
|
|
881
|
+
error: null,
|
|
882
|
+
progress: 0,
|
|
883
|
+
isSynthesizing: false,
|
|
884
|
+
isConnected: false,
|
|
885
|
+
isSessionStarted: false
|
|
886
|
+
});
|
|
887
|
+
this.streamText = "";
|
|
888
|
+
this.segmentQueue = [];
|
|
889
|
+
this.sessionAudioBuffers = [];
|
|
890
|
+
this.isStreamFinished = false;
|
|
891
|
+
this.isSessionFinished = false;
|
|
892
|
+
this.isSessionStarting = false;
|
|
893
|
+
if (this.client) {
|
|
894
|
+
this.client.close();
|
|
895
|
+
this.client = null;
|
|
896
|
+
}
|
|
897
|
+
this.splitter = new StreamingTextSplitter({
|
|
898
|
+
maxLength: this.config.maxSegmentLength || 150,
|
|
899
|
+
onSegmentComplete: (segment) => {
|
|
900
|
+
this.segmentQueue.push(segment);
|
|
901
|
+
if (this.state.isSessionStarted) {
|
|
902
|
+
this.processQueue();
|
|
903
|
+
}
|
|
689
904
|
}
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
};
|
|
708
|
-
utterance.onerror = (e) => {
|
|
709
|
-
console.error("[useMessageTTS] Fallback TTS failed", e);
|
|
710
|
-
setErrorState("Fallback TTS failed");
|
|
711
|
-
onError?.(new Error("Fallback TTS failed"));
|
|
712
|
-
setIsPlaying(false);
|
|
713
|
-
};
|
|
714
|
-
fallbackUtteranceRef.current = utterance;
|
|
715
|
-
window.speechSynthesis.speak(utterance);
|
|
716
|
-
},
|
|
717
|
-
[audioParams, instanceId, onError, onPlayEnd, onPlayStart, pause]
|
|
718
|
-
);
|
|
719
|
-
const executeTTS = (0, import_react3.useCallback)(
|
|
720
|
-
async (text, targetVoice) => {
|
|
721
|
-
stop();
|
|
722
|
-
stopOthers();
|
|
723
|
-
setErrorState(null);
|
|
724
|
-
setIsSynthesizing(true);
|
|
725
|
-
setProgress(0);
|
|
726
|
-
audioBuffersRef.current = [];
|
|
727
|
-
isFallbackRef.current = false;
|
|
728
|
-
const speed = audioParams?.speech_rate || 0;
|
|
729
|
-
const voice = targetVoice;
|
|
730
|
-
const cacheKey = TTSCache.generateKey(text, voice, speed);
|
|
731
|
-
cacheKeyRef.current = cacheKey;
|
|
732
|
-
const startTime = Date.now();
|
|
733
|
-
metricsCollector.record({
|
|
734
|
-
name: "tts_request",
|
|
735
|
-
labels: { voice, speed, text_length: text.length },
|
|
736
|
-
value: 1,
|
|
737
|
-
timestamp: startTime
|
|
738
|
-
});
|
|
905
|
+
});
|
|
906
|
+
this.client = (0, import_tts2.WebsocketMSE)({ autoStartSession: false });
|
|
907
|
+
const { ttsConfig, audioParams } = this.config;
|
|
908
|
+
const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
|
|
909
|
+
const startTime = Date.now();
|
|
910
|
+
this.config.metricsCollector?.record({
|
|
911
|
+
name: "tts_request",
|
|
912
|
+
labels: { voice, text_length: 0 },
|
|
913
|
+
value: 1,
|
|
914
|
+
timestamp: startTime
|
|
915
|
+
});
|
|
916
|
+
return new Promise((resolve, reject) => {
|
|
917
|
+
const timeoutId = setTimeout(() => {
|
|
918
|
+
const err = new Error("WebSocket connection timeout (10s)");
|
|
919
|
+
this.updateState({ error: err.message });
|
|
920
|
+
reject(err);
|
|
921
|
+
}, 1e4);
|
|
739
922
|
try {
|
|
740
|
-
const
|
|
741
|
-
const audio = new Audio();
|
|
742
|
-
audio.crossOrigin = "anonymous";
|
|
743
|
-
audioRef.current = audio;
|
|
744
|
-
audio.onplay = () => {
|
|
745
|
-
setIsPlaying(true);
|
|
746
|
-
setIsPaused(false);
|
|
747
|
-
onPlayStart?.();
|
|
748
|
-
initAudioContext();
|
|
749
|
-
activeInstances.set(instanceId, { pause });
|
|
750
|
-
metricsCollector.record({
|
|
751
|
-
name: "tts_latency",
|
|
752
|
-
labels: { stage: "playback", voice, speed },
|
|
753
|
-
value: Date.now() - startTime,
|
|
754
|
-
timestamp: Date.now()
|
|
755
|
-
});
|
|
756
|
-
};
|
|
757
|
-
audio.onpause = () => {
|
|
758
|
-
if (!audio.ended) {
|
|
759
|
-
}
|
|
760
|
-
};
|
|
761
|
-
audio.onended = () => {
|
|
762
|
-
setIsPlaying(false);
|
|
763
|
-
setIsPaused(false);
|
|
764
|
-
onPlayEnd?.();
|
|
765
|
-
activeInstances.delete(instanceId);
|
|
766
|
-
};
|
|
767
|
-
audio.onerror = (e) => {
|
|
768
|
-
console.error("Audio playback error:", e, audio.error);
|
|
769
|
-
metricsCollector.record({
|
|
770
|
-
name: "tts_error",
|
|
771
|
-
labels: {
|
|
772
|
-
error_code: "playback_error",
|
|
773
|
-
voice,
|
|
774
|
-
detail: audio.error?.message || String(audio.error?.code)
|
|
775
|
-
},
|
|
776
|
-
value: 1,
|
|
777
|
-
timestamp: Date.now()
|
|
778
|
-
});
|
|
779
|
-
handleError(text, voice);
|
|
780
|
-
};
|
|
781
|
-
audio.ontimeupdate = () => {
|
|
782
|
-
let duration = audio.duration;
|
|
783
|
-
if (!isFinite(duration)) {
|
|
784
|
-
if (audio.buffered.length > 0) {
|
|
785
|
-
duration = audio.buffered.end(audio.buffered.length - 1);
|
|
786
|
-
}
|
|
787
|
-
}
|
|
788
|
-
if (isFinite(duration) && duration > 0) {
|
|
789
|
-
setProgress(audio.currentTime / duration * 100);
|
|
790
|
-
}
|
|
791
|
-
};
|
|
792
|
-
if (cachedData) {
|
|
793
|
-
const totalSize = cachedData.reduce(
|
|
794
|
-
(acc, buf) => acc + buf.byteLength,
|
|
795
|
-
0
|
|
796
|
-
);
|
|
797
|
-
metricsCollector.record({
|
|
798
|
-
name: "tts_cache_hit",
|
|
799
|
-
labels: { voice, speed },
|
|
800
|
-
value: 1,
|
|
801
|
-
timestamp: Date.now()
|
|
802
|
-
});
|
|
803
|
-
console.log(
|
|
804
|
-
JSON.stringify({
|
|
805
|
-
event: "tts_cache_hit",
|
|
806
|
-
cache_hit: true,
|
|
807
|
-
text_len: text.length,
|
|
808
|
-
voice,
|
|
809
|
-
speed,
|
|
810
|
-
data_size: totalSize
|
|
811
|
-
})
|
|
812
|
-
);
|
|
813
|
-
if (totalSize === 0) {
|
|
814
|
-
console.warn(
|
|
815
|
-
"[useMessageTTS] Cached data is empty, falling back to stream"
|
|
816
|
-
);
|
|
817
|
-
} else {
|
|
818
|
-
const blob = new Blob(cachedData, { type: "audio/mpeg" });
|
|
819
|
-
const url2 = URL.createObjectURL(blob);
|
|
820
|
-
audioUrlRef.current = url2;
|
|
821
|
-
audio.src = url2;
|
|
822
|
-
setIsSynthesizing(false);
|
|
823
|
-
if (autoPlay) {
|
|
824
|
-
try {
|
|
825
|
-
await audio.play();
|
|
826
|
-
} catch (err) {
|
|
827
|
-
console.warn("AutoPlay blocked", err);
|
|
828
|
-
}
|
|
829
|
-
}
|
|
830
|
-
return;
|
|
831
|
-
}
|
|
832
|
-
}
|
|
833
|
-
console.log("[useMessageTTS] Cache miss, starting stream");
|
|
834
|
-
clientRef.current = (0, import_tts2.WebsocketMSE)({ autoStartSession: true });
|
|
835
|
-
const formattedText = import_volcano_sdk2.MarkdownFormatter.format(text).replace(
|
|
836
|
-
(0, import_emoji_regex2.default)(),
|
|
837
|
-
""
|
|
838
|
-
);
|
|
839
|
-
const segments = splitTextByDelimiters(formattedText);
|
|
840
|
-
const url = clientRef.current.start({
|
|
923
|
+
const url = this.client.start({
|
|
841
924
|
url: buildFullUrl2(WS_URL, {
|
|
842
925
|
api_access_key: `Jwt; ${ttsConfig.token}`,
|
|
843
926
|
api_app_key: ttsConfig.appid,
|
|
844
927
|
api_resource_id: ttsConfig.resourceId || "seed-tts-2.0"
|
|
845
928
|
}),
|
|
846
929
|
config: {
|
|
847
|
-
user: {
|
|
848
|
-
uid: `req-${Date.now()}`
|
|
849
|
-
},
|
|
930
|
+
user: { uid: `req-${Date.now()}` },
|
|
850
931
|
namespace: ttsConfig.namespace || "BidirectionalTTS",
|
|
851
932
|
req_params: {
|
|
852
933
|
speaker: voice,
|
|
@@ -861,456 +942,362 @@ function useMessageTTS({
|
|
|
861
942
|
enable_language_detector: true,
|
|
862
943
|
disable_markdown_filter: true,
|
|
863
944
|
enable_latex_tn: true
|
|
864
|
-
// max_length_to_filter_parenthesis: 100,
|
|
865
945
|
})
|
|
866
946
|
}
|
|
867
947
|
},
|
|
948
|
+
onStart: () => {
|
|
949
|
+
this.updateState({ isConnected: true });
|
|
950
|
+
},
|
|
951
|
+
onConnectionReady: () => {
|
|
952
|
+
clearTimeout(timeoutId);
|
|
953
|
+
resolve();
|
|
954
|
+
},
|
|
868
955
|
onSessionStarted: () => {
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
956
|
+
this.updateState({ isSessionStarted: true });
|
|
957
|
+
this.isSessionStarting = false;
|
|
958
|
+
if (this.segmentQueue.length > 0) {
|
|
959
|
+
this.processQueue();
|
|
960
|
+
}
|
|
873
961
|
},
|
|
874
962
|
onMessage: (data) => {
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
);
|
|
963
|
+
this.updateState({ isSynthesizing: true });
|
|
964
|
+
if (this.sessionAudioBuffers.length === 0) {
|
|
965
|
+
this.config.metricsCollector?.record({
|
|
966
|
+
name: "tts_latency",
|
|
967
|
+
labels: { stage: "first_packet", voice },
|
|
968
|
+
value: Date.now() - startTime,
|
|
969
|
+
timestamp: Date.now()
|
|
970
|
+
});
|
|
883
971
|
}
|
|
884
972
|
const buffer = data instanceof ArrayBuffer ? data.slice(0) : new Uint8Array(data).buffer;
|
|
885
|
-
|
|
973
|
+
this.sessionAudioBuffers.push(buffer);
|
|
886
974
|
},
|
|
887
975
|
onSessionFinished: () => {
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
}
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
text_len: text.length,
|
|
897
|
-
duration_ms: Date.now() - startTime,
|
|
976
|
+
this.updateState({
|
|
977
|
+
isSynthesizing: false,
|
|
978
|
+
isSessionStarted: false
|
|
979
|
+
});
|
|
980
|
+
if (this.sessionAudioBuffers.length > 0 && this.streamText) {
|
|
981
|
+
const speed = audioParams?.speech_rate || 0;
|
|
982
|
+
const cacheKey = TTSCache.generateKey(
|
|
983
|
+
this.streamText,
|
|
898
984
|
voice,
|
|
899
985
|
speed
|
|
900
|
-
|
|
901
|
-
|
|
986
|
+
);
|
|
987
|
+
TTSCache.set(cacheKey, [...this.sessionAudioBuffers]);
|
|
988
|
+
}
|
|
989
|
+
this.config.metricsCollector?.record({
|
|
990
|
+
name: "tts_synthesis_finished",
|
|
991
|
+
labels: { voice, text_length: this.streamText.length },
|
|
992
|
+
value: Date.now() - startTime,
|
|
993
|
+
timestamp: Date.now()
|
|
994
|
+
});
|
|
902
995
|
},
|
|
903
996
|
onError: (err) => {
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
997
|
+
if (!this.state.isConnected) {
|
|
998
|
+
clearTimeout(timeoutId);
|
|
999
|
+
reject(new Error(err.msg || "TTS error"));
|
|
1000
|
+
}
|
|
1001
|
+
console.error("[PlaybackSession] TTS error:", err);
|
|
1002
|
+
this.updateState({
|
|
1003
|
+
error: err.msg || "TTS error",
|
|
1004
|
+
isSynthesizing: false
|
|
910
1005
|
});
|
|
911
|
-
|
|
912
|
-
|
|
1006
|
+
this.config.onError?.(new Error(err.msg || "TTS error"));
|
|
1007
|
+
},
|
|
1008
|
+
onWSError: (err) => {
|
|
1009
|
+
if (!this.state.isConnected) {
|
|
1010
|
+
clearTimeout(timeoutId);
|
|
1011
|
+
reject(err instanceof Error ? err : new Error("WebSocket error"));
|
|
1012
|
+
}
|
|
913
1013
|
}
|
|
914
1014
|
});
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
1015
|
+
if (this.audioUrl) {
|
|
1016
|
+
URL.revokeObjectURL(this.audioUrl);
|
|
1017
|
+
}
|
|
1018
|
+
this.audioUrl = url;
|
|
1019
|
+
this.audio.src = url;
|
|
1020
|
+
if (this.config.autoPlay !== false) {
|
|
1021
|
+
this.audio.play().catch(
|
|
1022
|
+
(e) => console.warn("[PlaybackSession] Autoplay blocked:", e)
|
|
1023
|
+
);
|
|
923
1024
|
}
|
|
924
1025
|
} catch (err) {
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
timestamp: Date.now()
|
|
931
|
-
});
|
|
932
|
-
handleError(text, voice);
|
|
933
|
-
}
|
|
934
|
-
},
|
|
935
|
-
[
|
|
936
|
-
ttsConfig,
|
|
937
|
-
audioParams,
|
|
938
|
-
autoPlay,
|
|
939
|
-
stop,
|
|
940
|
-
stopOthers,
|
|
941
|
-
instanceId,
|
|
942
|
-
onPlayStart,
|
|
943
|
-
onPlayEnd,
|
|
944
|
-
initAudioContext,
|
|
945
|
-
pause,
|
|
946
|
-
fallbackVoice,
|
|
947
|
-
metricsCollector
|
|
948
|
-
]
|
|
949
|
-
);
|
|
950
|
-
const handleError = (0, import_react3.useCallback)(
|
|
951
|
-
(text, failedVoice) => {
|
|
952
|
-
if (fallbackVoice && failedVoice !== fallbackVoice) {
|
|
953
|
-
console.warn(
|
|
954
|
-
`[useMessageTTS] Voice ${failedVoice} failed, switching to fallback voice ${fallbackVoice}`
|
|
1026
|
+
clearTimeout(timeoutId);
|
|
1027
|
+
console.error("[PlaybackSession] Connect error:", err);
|
|
1028
|
+
this.updateState({ error: String(err) });
|
|
1029
|
+
this.config.onError?.(
|
|
1030
|
+
err instanceof Error ? err : new Error(String(err))
|
|
955
1031
|
);
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
1032
|
+
reject(err);
|
|
1033
|
+
}
|
|
1034
|
+
});
|
|
1035
|
+
}
|
|
1036
|
+
/**
|
|
1037
|
+
* 发送流式文本
|
|
1038
|
+
*/
|
|
1039
|
+
handleStreamChunk(chunk) {
|
|
1040
|
+
if (!chunk) return;
|
|
1041
|
+
this.streamText += chunk;
|
|
1042
|
+
if (!this.state.isSessionStarted && !this.isSessionStarting && this.client && this.state.isConnected && !this.isSessionFinished) {
|
|
1043
|
+
this.isSessionStarting = true;
|
|
1044
|
+
this.client.startSession();
|
|
1045
|
+
}
|
|
1046
|
+
this.splitter?.onChunk(chunk);
|
|
1047
|
+
if (this.state.isSessionStarted) {
|
|
1048
|
+
this.processQueue();
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
/**
|
|
1052
|
+
* 结束流式输入
|
|
1053
|
+
*/
|
|
1054
|
+
async finishStream() {
|
|
1055
|
+
this.isStreamFinished = true;
|
|
1056
|
+
this.updateState({ isStreamFinished: true });
|
|
1057
|
+
this.splitter?.complete();
|
|
1058
|
+
if (this.state.isSessionStarted) {
|
|
1059
|
+
this.processQueue();
|
|
1060
|
+
}
|
|
1061
|
+
if (this.segmentQueue.length > 0 || this.isSending) {
|
|
1062
|
+
await new Promise((resolve) => {
|
|
1063
|
+
this.resolveAllSegmentsSent = resolve;
|
|
1064
|
+
});
|
|
1065
|
+
} else if (this.client && this.state.isSessionStarted && !this.isSessionFinished) {
|
|
1066
|
+
this.isSessionFinished = true;
|
|
1067
|
+
this.client.finishSession();
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
/**
|
|
1071
|
+
* 处理非流式播放(直接播放整段文本)
|
|
1072
|
+
*/
|
|
1073
|
+
async play(text) {
|
|
1074
|
+
const formattedText = import_volcano_sdk3.MarkdownFormatter.format(text).replace(
|
|
1075
|
+
(0, import_emoji_regex3.default)(),
|
|
1076
|
+
""
|
|
1077
|
+
);
|
|
1078
|
+
const { audioParams } = this.config;
|
|
1079
|
+
const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
|
|
1080
|
+
const speed = audioParams?.speech_rate || 0;
|
|
1081
|
+
const cacheKey = TTSCache.generateKey(formattedText, voice, speed);
|
|
1082
|
+
const cachedData = await TTSCache.get(cacheKey);
|
|
1083
|
+
if (cachedData && cachedData.length > 0) {
|
|
1084
|
+
const blob = new Blob(cachedData, { type: "audio/mpeg" });
|
|
1085
|
+
const url = URL.createObjectURL(blob);
|
|
1086
|
+
if (this.audioUrl) URL.revokeObjectURL(this.audioUrl);
|
|
1087
|
+
this.audioUrl = url;
|
|
1088
|
+
this.audio.src = url;
|
|
1089
|
+
this.updateState({ isSynthesizing: false });
|
|
1090
|
+
if (this.config.autoPlay !== false) {
|
|
1091
|
+
try {
|
|
1092
|
+
await this.audio.play();
|
|
1093
|
+
} catch (e) {
|
|
1094
|
+
console.warn("Autoplay blocked", e);
|
|
963
1095
|
}
|
|
964
|
-
executeTTS(text, fallbackVoice);
|
|
965
|
-
} else {
|
|
966
|
-
playFallback(text);
|
|
967
1096
|
}
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
(
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
1097
|
+
return;
|
|
1098
|
+
}
|
|
1099
|
+
await this.connect();
|
|
1100
|
+
this.streamText = formattedText;
|
|
1101
|
+
const segments = splitTextByDelimiters(formattedText);
|
|
1102
|
+
if (this.state.isConnected) {
|
|
1103
|
+
if (!this.state.isSessionStarted && !this.isSessionStarting) {
|
|
1104
|
+
this.isSessionStarting = true;
|
|
1105
|
+
this.client?.startSession();
|
|
1106
|
+
}
|
|
1107
|
+
}
|
|
1108
|
+
segments.forEach((seg, idx) => {
|
|
1109
|
+
this.segmentQueue.push({
|
|
1110
|
+
index: idx,
|
|
1111
|
+
content: seg.content,
|
|
1112
|
+
length: seg.content.length,
|
|
1113
|
+
sent: false
|
|
1114
|
+
});
|
|
1115
|
+
});
|
|
1116
|
+
if (this.state.isSessionStarted) {
|
|
1117
|
+
this.processQueue();
|
|
1118
|
+
}
|
|
1119
|
+
await this.finishStream();
|
|
1120
|
+
}
|
|
1121
|
+
processQueue() {
|
|
1122
|
+
if (!this.client || !this.state.isSessionStarted || this.isSending || this.isSessionFinished) {
|
|
1123
|
+
return;
|
|
1124
|
+
}
|
|
1125
|
+
if (this.segmentQueue.length === 0) {
|
|
1126
|
+
if (this.isStreamFinished && !this.isSessionFinished) {
|
|
1127
|
+
this.isSessionFinished = true;
|
|
1128
|
+
this.client.finishSession();
|
|
1129
|
+
this.resolveAllSegmentsSent?.();
|
|
1130
|
+
}
|
|
1131
|
+
return;
|
|
1132
|
+
}
|
|
1133
|
+
this.isSending = true;
|
|
1134
|
+
const segment = this.segmentQueue.shift();
|
|
1135
|
+
this.client.sendText(segment.content);
|
|
1136
|
+
segment.sent = true;
|
|
1137
|
+
this.isSending = false;
|
|
1138
|
+
setTimeout(() => this.processQueue(), 0);
|
|
1139
|
+
}
|
|
1140
|
+
pause() {
|
|
1141
|
+
this.audio.pause();
|
|
1142
|
+
this.updateState({ isPaused: true, isPlaying: false });
|
|
1143
|
+
}
|
|
1144
|
+
resume() {
|
|
1145
|
+
this.audio.play();
|
|
1146
|
+
this.updateState({ isPaused: false, isPlaying: true });
|
|
1147
|
+
}
|
|
1148
|
+
stop() {
|
|
1149
|
+
if (this.client) {
|
|
1150
|
+
this.client.close();
|
|
1151
|
+
this.client = null;
|
|
1152
|
+
}
|
|
1153
|
+
this.audio.pause();
|
|
1154
|
+
this.audio.currentTime = 0;
|
|
1155
|
+
if (this.audioUrl) {
|
|
1156
|
+
URL.revokeObjectURL(this.audioUrl);
|
|
1157
|
+
this.audioUrl = null;
|
|
1158
|
+
}
|
|
1159
|
+
this.stopVisualizationLoop();
|
|
1160
|
+
this.audioContext?.close();
|
|
1161
|
+
this.audioContext = null;
|
|
1162
|
+
this.updateState({
|
|
1163
|
+
isPlaying: false,
|
|
1164
|
+
isPaused: false,
|
|
1165
|
+
isSynthesizing: false,
|
|
1166
|
+
progress: 0,
|
|
1167
|
+
isConnected: false,
|
|
1168
|
+
isSessionStarted: false
|
|
1169
|
+
});
|
|
1170
|
+
}
|
|
1171
|
+
seek(percentage) {
|
|
1172
|
+
let duration = this.audio.duration;
|
|
1173
|
+
if (!isFinite(duration) && this.audio.buffered.length > 0) {
|
|
1174
|
+
duration = this.audio.buffered.end(this.audio.buffered.length - 1);
|
|
1175
|
+
}
|
|
1176
|
+
if (isFinite(duration) && duration > 0) {
|
|
1177
|
+
const time = percentage / 100 * duration;
|
|
1178
|
+
if (isFinite(time)) {
|
|
1179
|
+
this.audio.currentTime = time;
|
|
1180
|
+
this.updateState({ progress: percentage });
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
}
|
|
1184
|
+
updateState(partial) {
|
|
1185
|
+
this.state = { ...this.state, ...partial };
|
|
1186
|
+
this.notifyListeners();
|
|
1187
|
+
}
|
|
1188
|
+
subscribe(listener) {
|
|
1189
|
+
this.listeners.add(listener);
|
|
1190
|
+
listener(this.state);
|
|
1191
|
+
return () => this.listeners.delete(listener);
|
|
1192
|
+
}
|
|
1193
|
+
notifyListeners() {
|
|
1194
|
+
this.listeners.forEach((l) => l(this.state));
|
|
1195
|
+
}
|
|
1196
|
+
// Visualization
|
|
1197
|
+
getFrequencyData() {
|
|
1198
|
+
if (!this.analyser) return new Uint8Array(0);
|
|
1199
|
+
const data = new Uint8Array(this.analyser.frequencyBinCount);
|
|
1200
|
+
this.analyser.getByteFrequencyData(data);
|
|
1201
|
+
return data;
|
|
1202
|
+
}
|
|
1203
|
+
getTimeDomainData() {
|
|
1204
|
+
if (!this.analyser) return new Uint8Array(0);
|
|
1205
|
+
const data = new Uint8Array(this.analyser.frequencyBinCount);
|
|
1206
|
+
this.analyser.getByteTimeDomainData(data);
|
|
1207
|
+
return data;
|
|
1208
|
+
}
|
|
1209
|
+
startVisualizationLoop() {
|
|
1210
|
+
if (!this.config.visualization?.enabled) return;
|
|
995
1211
|
const update = (timestamp) => {
|
|
996
|
-
if (isPlaying && !isPaused) {
|
|
997
|
-
if (timestamp -
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1212
|
+
if (this.state.isPlaying && !this.state.isPaused) {
|
|
1213
|
+
if (timestamp - this.lastVisUpdate >= (this.config.visualization?.refreshInterval || 0)) {
|
|
1214
|
+
this.updateState({
|
|
1215
|
+
visualizationData: {
|
|
1216
|
+
frequencyData: this.getFrequencyData(),
|
|
1217
|
+
timeDomainData: this.getTimeDomainData()
|
|
1218
|
+
}
|
|
1001
1219
|
});
|
|
1002
|
-
|
|
1220
|
+
this.lastVisUpdate = timestamp;
|
|
1003
1221
|
}
|
|
1004
|
-
animId = requestAnimationFrame(update);
|
|
1222
|
+
this.animId = requestAnimationFrame(update);
|
|
1005
1223
|
}
|
|
1006
1224
|
};
|
|
1007
|
-
|
|
1008
|
-
|
|
1225
|
+
this.animId = requestAnimationFrame(update);
|
|
1226
|
+
}
|
|
1227
|
+
stopVisualizationLoop() {
|
|
1228
|
+
if (this.animId) {
|
|
1229
|
+
cancelAnimationFrame(this.animId);
|
|
1230
|
+
this.animId = null;
|
|
1009
1231
|
}
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1232
|
+
}
|
|
1233
|
+
};
|
|
1234
|
+
var StreamPlaybackManagerImpl = class {
|
|
1235
|
+
constructor() {
|
|
1236
|
+
this.sessions = /* @__PURE__ */ new Map();
|
|
1237
|
+
this.activeStreamId = null;
|
|
1238
|
+
}
|
|
1239
|
+
/**
|
|
1240
|
+
* 创建新的播放会话
|
|
1241
|
+
*/
|
|
1242
|
+
createSession(id, config) {
|
|
1243
|
+
if (this.activeStreamId && this.activeStreamId !== id) {
|
|
1244
|
+
this.pause(this.activeStreamId);
|
|
1245
|
+
}
|
|
1246
|
+
const session = new PlaybackSession(id, config);
|
|
1247
|
+
this.sessions.set(id, session);
|
|
1248
|
+
this.activeStreamId = id;
|
|
1249
|
+
return session;
|
|
1250
|
+
}
|
|
1251
|
+
/**
|
|
1252
|
+
* 获取会话
|
|
1253
|
+
*/
|
|
1254
|
+
getSession(id) {
|
|
1255
|
+
return this.sessions.get(id);
|
|
1256
|
+
}
|
|
1257
|
+
/**
|
|
1258
|
+
* 停止会话
|
|
1259
|
+
*/
|
|
1260
|
+
stop(id) {
|
|
1261
|
+
const session = this.sessions.get(id);
|
|
1262
|
+
if (session) {
|
|
1263
|
+
session.stop();
|
|
1264
|
+
this.sessions.delete(id);
|
|
1265
|
+
if (this.activeStreamId === id) {
|
|
1266
|
+
this.activeStreamId = null;
|
|
1036
1267
|
}
|
|
1037
1268
|
}
|
|
1038
|
-
}, []);
|
|
1039
|
-
return {
|
|
1040
|
-
isPlaying,
|
|
1041
|
-
isPaused,
|
|
1042
|
-
isSynthesizing,
|
|
1043
|
-
error,
|
|
1044
|
-
play,
|
|
1045
|
-
pause,
|
|
1046
|
-
resume,
|
|
1047
|
-
stop,
|
|
1048
|
-
togglePlay,
|
|
1049
|
-
seek,
|
|
1050
|
-
progress,
|
|
1051
|
-
getFrequencyData,
|
|
1052
|
-
getTimeDomainData,
|
|
1053
|
-
visualizationData
|
|
1054
|
-
};
|
|
1055
|
-
}
|
|
1056
|
-
|
|
1057
|
-
// src/tts/useStreamTTS.ts
|
|
1058
|
-
var import_tts3 = require("@wq-hook/volcano-sdk/tts");
|
|
1059
|
-
var import_react4 = require("react");
|
|
1060
|
-
|
|
1061
|
-
// src/tts/StreamingTextSplitter.ts
|
|
1062
|
-
var import_volcano_sdk3 = require("@wq-hook/volcano-sdk");
|
|
1063
|
-
var import_emoji_regex3 = __toESM(require("emoji-regex"));
|
|
1064
|
-
var StreamingTextSplitter = class {
|
|
1065
|
-
constructor(options = {}) {
|
|
1066
|
-
/** 当前缓冲区 */
|
|
1067
|
-
this.buffer = "";
|
|
1068
|
-
/** 分段索引计数器 */
|
|
1069
|
-
this.segmentIndex = 0;
|
|
1070
|
-
/** 已完成的分段列表 */
|
|
1071
|
-
this.segments = [];
|
|
1072
|
-
/** 是否已完成 */
|
|
1073
|
-
this.isCompleted = false;
|
|
1074
|
-
this.maxLength = options.maxLength || 150;
|
|
1075
|
-
this.minLength = options.minLength || 10;
|
|
1076
|
-
this.onSegmentComplete = options.onSegmentComplete;
|
|
1077
|
-
this.onAllComplete = options.onAllComplete;
|
|
1078
|
-
}
|
|
1079
|
-
/**
|
|
1080
|
-
* 接收流式文本块
|
|
1081
|
-
* @param chunk - 文本块
|
|
1082
|
-
*/
|
|
1083
|
-
onChunk(chunk) {
|
|
1084
|
-
if (!chunk || this.isCompleted) return;
|
|
1085
|
-
this.buffer += chunk;
|
|
1086
|
-
if (this.detectBoundary(chunk)) {
|
|
1087
|
-
const newlineIndex = this.buffer.indexOf("\n");
|
|
1088
|
-
if (newlineIndex !== -1) {
|
|
1089
|
-
if (newlineIndex === 0) {
|
|
1090
|
-
this.buffer = this.buffer.substring(1);
|
|
1091
|
-
return;
|
|
1092
|
-
}
|
|
1093
|
-
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
1094
|
-
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
1095
|
-
this.flushSegmentWithBuffer(segmentBuffer);
|
|
1096
|
-
while (this.buffer.includes("\n")) {
|
|
1097
|
-
const nextNewlineIndex = this.buffer.indexOf("\n");
|
|
1098
|
-
if (nextNewlineIndex === 0) {
|
|
1099
|
-
this.buffer = this.buffer.substring(1);
|
|
1100
|
-
continue;
|
|
1101
|
-
}
|
|
1102
|
-
const nextSegmentBuffer = this.buffer.substring(0, nextNewlineIndex);
|
|
1103
|
-
this.buffer = this.buffer.substring(nextNewlineIndex + 1);
|
|
1104
|
-
this.flushSegmentWithBuffer(nextSegmentBuffer);
|
|
1105
|
-
}
|
|
1106
|
-
}
|
|
1107
|
-
}
|
|
1108
|
-
}
|
|
1109
|
-
/**
|
|
1110
|
-
* 检测分段边界
|
|
1111
|
-
* @param chunk - 最新接收的文本块
|
|
1112
|
-
* @returns 是否应该分段
|
|
1113
|
-
*/
|
|
1114
|
-
detectBoundary(chunk) {
|
|
1115
|
-
if (chunk.includes("\n")) {
|
|
1116
|
-
if (this.buffer.length >= this.maxLength) {
|
|
1117
|
-
this.forceSplitAtSentenceBoundary();
|
|
1118
|
-
}
|
|
1119
|
-
return true;
|
|
1120
|
-
}
|
|
1121
|
-
if (this.buffer.length >= this.maxLength) {
|
|
1122
|
-
this.forceSplitAtSentenceBoundary();
|
|
1123
|
-
return true;
|
|
1124
|
-
}
|
|
1125
|
-
return false;
|
|
1126
1269
|
}
|
|
1127
1270
|
/**
|
|
1128
|
-
*
|
|
1271
|
+
* 暂停会话
|
|
1129
1272
|
*/
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
const sentenceEnders = /[。?!]/g;
|
|
1133
|
-
let lastMatch = null;
|
|
1134
|
-
let match = null;
|
|
1135
|
-
while ((match = sentenceEnders.exec(content)) !== null) {
|
|
1136
|
-
lastMatch = match;
|
|
1137
|
-
}
|
|
1138
|
-
if (lastMatch && lastMatch.index > this.minLength) {
|
|
1139
|
-
const splitPoint = lastMatch.index + 1;
|
|
1140
|
-
const firstPart = content.substring(0, splitPoint);
|
|
1141
|
-
const secondPart = content.substring(splitPoint);
|
|
1142
|
-
this.buffer = firstPart;
|
|
1143
|
-
this.flushSegment();
|
|
1144
|
-
this.buffer = secondPart;
|
|
1145
|
-
} else {
|
|
1146
|
-
const midPoint = Math.floor(content.length / 2);
|
|
1147
|
-
const firstPart = content.substring(0, midPoint);
|
|
1148
|
-
const secondPart = content.substring(midPoint);
|
|
1149
|
-
this.buffer = firstPart;
|
|
1150
|
-
this.flushSegment();
|
|
1151
|
-
this.buffer = secondPart;
|
|
1152
|
-
}
|
|
1273
|
+
pause(id) {
|
|
1274
|
+
this.sessions.get(id)?.pause();
|
|
1153
1275
|
}
|
|
1154
1276
|
/**
|
|
1155
|
-
*
|
|
1156
|
-
* @param bufferToFlush - 要分段的缓冲区内容
|
|
1277
|
+
* 恢复会话
|
|
1157
1278
|
*/
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
1162
|
-
const isTooShort = content.length < 3;
|
|
1163
|
-
if (isPureSymbols && isTooShort) {
|
|
1164
|
-
return;
|
|
1165
|
-
}
|
|
1166
|
-
const formattedContent = import_volcano_sdk3.MarkdownFormatter.format(content).replace((0, import_emoji_regex3.default)(), "");
|
|
1167
|
-
if (!formattedContent) return;
|
|
1168
|
-
let subSegments = [formattedContent];
|
|
1169
|
-
if (formattedContent.length > this.maxLength) {
|
|
1170
|
-
subSegments = this.splitLongSegment(formattedContent);
|
|
1171
|
-
}
|
|
1172
|
-
for (const subSegment of subSegments) {
|
|
1173
|
-
if (!subSegment) continue;
|
|
1174
|
-
const segment = {
|
|
1175
|
-
index: this.segmentIndex++,
|
|
1176
|
-
content: subSegment,
|
|
1177
|
-
length: subSegment.length,
|
|
1178
|
-
sent: false
|
|
1179
|
-
};
|
|
1180
|
-
this.segments.push(segment);
|
|
1181
|
-
this.onSegmentComplete?.(segment);
|
|
1279
|
+
resume(id) {
|
|
1280
|
+
if (this.activeStreamId && this.activeStreamId !== id) {
|
|
1281
|
+
this.pause(this.activeStreamId);
|
|
1182
1282
|
}
|
|
1283
|
+
this.sessions.get(id)?.resume();
|
|
1284
|
+
this.activeStreamId = id;
|
|
1183
1285
|
}
|
|
1184
1286
|
/**
|
|
1185
|
-
*
|
|
1287
|
+
* 注册(兼容旧 API,但推荐直接用 createSession)
|
|
1288
|
+
* 为了兼容 useMessageTTS 旧逻辑,这里可以保留一些别名,但我们会重构 hook,所以可以改变 API。
|
|
1186
1289
|
*/
|
|
1187
|
-
flushSegment() {
|
|
1188
|
-
const content = this.buffer.trim();
|
|
1189
|
-
if (!content) {
|
|
1190
|
-
this.buffer = "";
|
|
1191
|
-
return;
|
|
1192
|
-
}
|
|
1193
|
-
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
1194
|
-
const isTooShort = content.length < 3;
|
|
1195
|
-
if (isPureSymbols && isTooShort) {
|
|
1196
|
-
this.buffer = "";
|
|
1197
|
-
return;
|
|
1198
|
-
}
|
|
1199
|
-
const formattedContent = import_volcano_sdk3.MarkdownFormatter.format(content).replace((0, import_emoji_regex3.default)(), "");
|
|
1200
|
-
if (!formattedContent) {
|
|
1201
|
-
this.buffer = "";
|
|
1202
|
-
return;
|
|
1203
|
-
}
|
|
1204
|
-
let subSegments = [formattedContent];
|
|
1205
|
-
if (formattedContent.length > this.maxLength) {
|
|
1206
|
-
subSegments = this.splitLongSegment(formattedContent);
|
|
1207
|
-
}
|
|
1208
|
-
for (const subSegment of subSegments) {
|
|
1209
|
-
if (!subSegment) continue;
|
|
1210
|
-
const segment = {
|
|
1211
|
-
index: this.segmentIndex++,
|
|
1212
|
-
content: subSegment,
|
|
1213
|
-
length: subSegment.length,
|
|
1214
|
-
sent: false
|
|
1215
|
-
};
|
|
1216
|
-
this.segments.push(segment);
|
|
1217
|
-
this.onSegmentComplete?.(segment);
|
|
1218
|
-
}
|
|
1219
|
-
this.buffer = "";
|
|
1220
|
-
}
|
|
1221
|
-
/**
|
|
1222
|
-
* 拆分超长分段
|
|
1223
|
-
* @param segment - 超长的分段
|
|
1224
|
-
* @returns 拆分后的分段数组
|
|
1225
|
-
*/
|
|
1226
|
-
splitLongSegment(segment) {
|
|
1227
|
-
const result = [];
|
|
1228
|
-
let current = "";
|
|
1229
|
-
for (const char of segment) {
|
|
1230
|
-
current += char;
|
|
1231
|
-
const shouldSplit = /[。?!,,]/.test(char);
|
|
1232
|
-
if (shouldSplit && current.length <= this.maxLength) {
|
|
1233
|
-
result.push(current);
|
|
1234
|
-
current = "";
|
|
1235
|
-
} else if (current.length >= this.maxLength) {
|
|
1236
|
-
result.push(current);
|
|
1237
|
-
current = "";
|
|
1238
|
-
}
|
|
1239
|
-
}
|
|
1240
|
-
if (current) {
|
|
1241
|
-
result.push(current);
|
|
1242
|
-
}
|
|
1243
|
-
return result.filter((s) => s.length > 0);
|
|
1244
|
-
}
|
|
1245
|
-
/**
|
|
1246
|
-
* 完成流式输入
|
|
1247
|
-
* 处理剩余的缓冲区内容
|
|
1248
|
-
*/
|
|
1249
|
-
complete() {
|
|
1250
|
-
if (this.isCompleted) return;
|
|
1251
|
-
this.isCompleted = true;
|
|
1252
|
-
while (this.buffer.includes("\n")) {
|
|
1253
|
-
const newlineIndex = this.buffer.indexOf("\n");
|
|
1254
|
-
if (newlineIndex === 0) {
|
|
1255
|
-
this.buffer = this.buffer.substring(1);
|
|
1256
|
-
continue;
|
|
1257
|
-
}
|
|
1258
|
-
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
1259
|
-
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
1260
|
-
this.flushSegmentWithBuffer(segmentBuffer);
|
|
1261
|
-
}
|
|
1262
|
-
if (this.buffer.trim()) {
|
|
1263
|
-
this.flushSegment();
|
|
1264
|
-
}
|
|
1265
|
-
this.onAllComplete?.(this.segments);
|
|
1266
|
-
}
|
|
1267
|
-
/**
|
|
1268
|
-
* 重置分段器状态
|
|
1269
|
-
*/
|
|
1270
|
-
reset() {
|
|
1271
|
-
this.buffer = "";
|
|
1272
|
-
this.segmentIndex = 0;
|
|
1273
|
-
this.segments = [];
|
|
1274
|
-
this.isCompleted = false;
|
|
1275
|
-
}
|
|
1276
|
-
/**
|
|
1277
|
-
* 获取当前缓冲区内容
|
|
1278
|
-
*/
|
|
1279
|
-
getBuffer() {
|
|
1280
|
-
return this.buffer;
|
|
1281
|
-
}
|
|
1282
|
-
/**
|
|
1283
|
-
* 获取已分段的列表
|
|
1284
|
-
*/
|
|
1285
|
-
getSegments() {
|
|
1286
|
-
return this.segments;
|
|
1287
|
-
}
|
|
1288
|
-
/**
|
|
1289
|
-
* 获取统计信息
|
|
1290
|
-
*/
|
|
1291
|
-
getStats() {
|
|
1292
|
-
return {
|
|
1293
|
-
bufferLength: this.buffer.length,
|
|
1294
|
-
segmentCount: this.segments.length,
|
|
1295
|
-
totalChars: this.segments.reduce((sum, seg) => sum + seg.length, 0)
|
|
1296
|
-
};
|
|
1297
|
-
}
|
|
1298
1290
|
};
|
|
1291
|
+
var StreamPlaybackManager = new StreamPlaybackManagerImpl();
|
|
1299
1292
|
|
|
1300
|
-
// src/tts/
|
|
1301
|
-
var
|
|
1302
|
-
|
|
1303
|
-
var sessionAudioCache = /* @__PURE__ */ new Map();
|
|
1304
|
-
function buildFullUrl3(url, params) {
|
|
1305
|
-
const arr = [];
|
|
1306
|
-
for (const key in params) {
|
|
1307
|
-
if (Object.prototype.hasOwnProperty.call(params, key)) {
|
|
1308
|
-
arr.push(`${key}=${encodeURIComponent(params[key])}`);
|
|
1309
|
-
}
|
|
1293
|
+
// src/tts/Metrics.ts
|
|
1294
|
+
var NoopMetricsCollector = class {
|
|
1295
|
+
record(_metric) {
|
|
1310
1296
|
}
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1297
|
+
};
|
|
1298
|
+
|
|
1299
|
+
// src/tts/useMessageTTS.ts
|
|
1300
|
+
function useMessageTTS({
|
|
1314
1301
|
ttsConfig,
|
|
1315
1302
|
audioParams,
|
|
1316
1303
|
autoPlay = true,
|
|
@@ -1319,431 +1306,427 @@ function useStreamTTS({
|
|
|
1319
1306
|
onPlayPause,
|
|
1320
1307
|
onPlayResume,
|
|
1321
1308
|
onPlayEnd,
|
|
1309
|
+
onStop,
|
|
1322
1310
|
onError,
|
|
1311
|
+
fallbackVoice,
|
|
1323
1312
|
visualization,
|
|
1324
|
-
|
|
1313
|
+
streamId: externalStreamId
|
|
1325
1314
|
}) {
|
|
1326
|
-
const
|
|
1327
|
-
const [
|
|
1328
|
-
const [
|
|
1329
|
-
const
|
|
1330
|
-
const [
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1315
|
+
const isSubscriptionMode = !!externalStreamId;
|
|
1316
|
+
const [internalStreamId, setInternalStreamId] = (0, import_react3.useState)("");
|
|
1317
|
+
const [isSwitchedToIndependent, setIsSwitchedToIndependent] = (0, import_react3.useState)(false);
|
|
1318
|
+
const streamId = isSwitchedToIndependent ? internalStreamId : externalStreamId || internalStreamId;
|
|
1319
|
+
const [state, setState] = (0, import_react3.useState)({
|
|
1320
|
+
isPlaying: false,
|
|
1321
|
+
isPaused: false,
|
|
1322
|
+
isSynthesizing: false,
|
|
1323
|
+
progress: 0,
|
|
1324
|
+
visualizationData: {
|
|
1325
|
+
frequencyData: new Uint8Array(0),
|
|
1326
|
+
timeDomainData: new Uint8Array(0)
|
|
1327
|
+
},
|
|
1328
|
+
error: null,
|
|
1329
|
+
isConnected: false,
|
|
1330
|
+
isSessionStarted: false,
|
|
1331
|
+
isStreamFinished: false
|
|
1337
1332
|
});
|
|
1338
|
-
const
|
|
1339
|
-
const
|
|
1340
|
-
const
|
|
1341
|
-
const
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
const isStreamFinishedRef = (0, import_react4.useRef)(false);
|
|
1354
|
-
const isSessionFinishedRef = (0, import_react4.useRef)(false);
|
|
1355
|
-
const resolveAllSegmentsSentRef = (0, import_react4.useRef)(null);
|
|
1356
|
-
const currentVoiceRef = (0, import_react4.useRef)("");
|
|
1357
|
-
const initAudioContext = (0, import_react4.useCallback)(() => {
|
|
1358
|
-
if (!audioRef.current) return;
|
|
1359
|
-
if (!audioContextRef.current) {
|
|
1360
|
-
const AudioContextClass = window.AudioContext || window.webkitAudioContext;
|
|
1361
|
-
audioContextRef.current = new AudioContextClass();
|
|
1362
|
-
}
|
|
1363
|
-
if (audioContextRef.current.state === "suspended") {
|
|
1364
|
-
audioContextRef.current.resume();
|
|
1365
|
-
}
|
|
1366
|
-
if (!analyserRef.current) {
|
|
1367
|
-
analyserRef.current = audioContextRef.current.createAnalyser();
|
|
1368
|
-
analyserRef.current.fftSize = visualization?.fftSize || 256;
|
|
1369
|
-
}
|
|
1370
|
-
if (!sourceRef.current) {
|
|
1371
|
-
try {
|
|
1372
|
-
sourceRef.current = audioContextRef.current.createMediaElementSource(audioRef.current);
|
|
1373
|
-
sourceRef.current.connect(analyserRef.current);
|
|
1374
|
-
analyserRef.current.connect(audioContextRef.current.destination);
|
|
1375
|
-
} catch (e) {
|
|
1376
|
-
}
|
|
1333
|
+
const [error, setErrorState] = (0, import_react3.useState)(null);
|
|
1334
|
+
const isFallbackRef = (0, import_react3.useRef)(false);
|
|
1335
|
+
const fallbackUtteranceRef = (0, import_react3.useRef)(null);
|
|
1336
|
+
const currentTextRef = (0, import_react3.useRef)("");
|
|
1337
|
+
(0, import_react3.useEffect)(() => {
|
|
1338
|
+
if (!streamId) return;
|
|
1339
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1340
|
+
if (session) {
|
|
1341
|
+
const unsubscribe = session.subscribe((newState) => {
|
|
1342
|
+
setState(newState);
|
|
1343
|
+
if (newState.error) setErrorState(newState.error);
|
|
1344
|
+
});
|
|
1345
|
+
return () => {
|
|
1346
|
+
unsubscribe();
|
|
1347
|
+
};
|
|
1377
1348
|
}
|
|
1378
|
-
}, [
|
|
1379
|
-
const
|
|
1380
|
-
if (
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
audioRef.current.onerror = null;
|
|
1386
|
-
audioRef.current.onended = null;
|
|
1387
|
-
audioRef.current.onpause = null;
|
|
1388
|
-
audioRef.current.onplay = null;
|
|
1389
|
-
audioRef.current.ontimeupdate = null;
|
|
1390
|
-
audioRef.current.pause();
|
|
1391
|
-
audioRef.current.src = "";
|
|
1392
|
-
audioRef.current = null;
|
|
1393
|
-
}
|
|
1394
|
-
if (sourceRef.current) {
|
|
1395
|
-
try {
|
|
1396
|
-
sourceRef.current.disconnect();
|
|
1397
|
-
} catch (e) {
|
|
1349
|
+
}, [streamId]);
|
|
1350
|
+
const stop = (0, import_react3.useCallback)(() => {
|
|
1351
|
+
if (streamId) {
|
|
1352
|
+
StreamPlaybackManager.stop(streamId);
|
|
1353
|
+
if (!isSubscriptionMode || isSwitchedToIndependent) {
|
|
1354
|
+
setInternalStreamId("");
|
|
1355
|
+
setIsSwitchedToIndependent(false);
|
|
1398
1356
|
}
|
|
1399
|
-
sourceRef.current = null;
|
|
1400
1357
|
}
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
if (id !== instanceId) {
|
|
1405
|
-
instance.pause();
|
|
1406
|
-
}
|
|
1407
|
-
});
|
|
1408
|
-
}, [instanceId]);
|
|
1409
|
-
const pause = (0, import_react4.useCallback)(() => {
|
|
1410
|
-
if (audioRef.current) {
|
|
1411
|
-
audioRef.current.pause();
|
|
1358
|
+
if (fallbackUtteranceRef.current) {
|
|
1359
|
+
window.speechSynthesis.cancel();
|
|
1360
|
+
fallbackUtteranceRef.current = null;
|
|
1412
1361
|
}
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
if (
|
|
1429
|
-
|
|
1362
|
+
isFallbackRef.current = false;
|
|
1363
|
+
setState((prev) => ({
|
|
1364
|
+
...prev,
|
|
1365
|
+
isPlaying: false,
|
|
1366
|
+
isPaused: false,
|
|
1367
|
+
isSynthesizing: false,
|
|
1368
|
+
progress: 0
|
|
1369
|
+
}));
|
|
1370
|
+
onStop?.();
|
|
1371
|
+
}, [streamId, isSubscriptionMode, isSwitchedToIndependent, onStop]);
|
|
1372
|
+
const pause = (0, import_react3.useCallback)(() => {
|
|
1373
|
+
if (isFallbackRef.current) {
|
|
1374
|
+
window.speechSynthesis.pause();
|
|
1375
|
+
setState((prev) => ({ ...prev, isPaused: true, isPlaying: false }));
|
|
1376
|
+
onPlayPause?.();
|
|
1377
|
+
} else if (streamId) {
|
|
1378
|
+
StreamPlaybackManager.pause(streamId);
|
|
1430
1379
|
}
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1380
|
+
}, [streamId, onPlayPause]);
|
|
1381
|
+
const resume = (0, import_react3.useCallback)(() => {
|
|
1382
|
+
if (isFallbackRef.current) {
|
|
1383
|
+
window.speechSynthesis.resume();
|
|
1384
|
+
setState((prev) => ({ ...prev, isPaused: false, isPlaying: true }));
|
|
1385
|
+
onPlayResume?.();
|
|
1386
|
+
} else if (streamId) {
|
|
1387
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1388
|
+
if (session) {
|
|
1389
|
+
StreamPlaybackManager.resume(streamId);
|
|
1390
|
+
} else {
|
|
1391
|
+
console.log(
|
|
1392
|
+
"[useMessageTTS] Session not found, resetting pause state"
|
|
1393
|
+
);
|
|
1394
|
+
setState((prev) => ({ ...prev, isPaused: false, isPlaying: false }));
|
|
1437
1395
|
}
|
|
1438
|
-
return;
|
|
1439
1396
|
}
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
}, []);
|
|
1448
|
-
const
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
setIsSynthesizing(false);
|
|
1462
|
-
setProgress(0);
|
|
1463
|
-
activeInstances2.delete(instanceId);
|
|
1464
|
-
streamTextRef.current = "";
|
|
1465
|
-
setStreamText("");
|
|
1466
|
-
segmentQueueRef.current = [];
|
|
1467
|
-
isSendingRef.current = false;
|
|
1468
|
-
sessionAudioBuffersRef.current = [];
|
|
1469
|
-
isStreamFinishedRef.current = false;
|
|
1470
|
-
isSessionFinishedRef.current = false;
|
|
1471
|
-
splitterRef.current?.reset();
|
|
1472
|
-
}, [cleanupAudio, instanceId]);
|
|
1473
|
-
const connect = (0, import_react4.useCallback)(async () => {
|
|
1474
|
-
stop();
|
|
1475
|
-
setErrorState(null);
|
|
1476
|
-
setProgress(0);
|
|
1477
|
-
sessionAudioBuffersRef.current = [];
|
|
1478
|
-
isStreamFinishedRef.current = false;
|
|
1479
|
-
streamTextRef.current = "";
|
|
1480
|
-
setStreamText("");
|
|
1481
|
-
segmentQueueRef.current = [];
|
|
1482
|
-
isSendingRef.current = false;
|
|
1483
|
-
isSessionStartedRef.current = false;
|
|
1484
|
-
calledSessionStartedRef.current = false;
|
|
1485
|
-
setIsSessionStarted(false);
|
|
1486
|
-
const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
|
|
1487
|
-
currentVoiceRef.current = voice;
|
|
1488
|
-
const startTime = Date.now();
|
|
1489
|
-
metricsCollector.record({
|
|
1490
|
-
name: "tts_request",
|
|
1491
|
-
labels: { voice, text_length: 0 },
|
|
1492
|
-
value: 1,
|
|
1493
|
-
timestamp: startTime
|
|
1494
|
-
});
|
|
1495
|
-
try {
|
|
1496
|
-
const audio = new Audio();
|
|
1497
|
-
audio.crossOrigin = "anonymous";
|
|
1498
|
-
audioRef.current = audio;
|
|
1499
|
-
audio.onplay = () => {
|
|
1500
|
-
setIsPlaying(true);
|
|
1501
|
-
setIsPaused(false);
|
|
1397
|
+
}, [streamId, onPlayResume]);
|
|
1398
|
+
const togglePlay = (0, import_react3.useCallback)(() => {
|
|
1399
|
+
if (state.isPlaying) {
|
|
1400
|
+
pause();
|
|
1401
|
+
} else {
|
|
1402
|
+
resume();
|
|
1403
|
+
}
|
|
1404
|
+
}, [state.isPlaying, pause, resume]);
|
|
1405
|
+
const playFallback = (0, import_react3.useCallback)(
|
|
1406
|
+
(text) => {
|
|
1407
|
+
console.warn("[useMessageTTS] Switching to fallback TTS");
|
|
1408
|
+
stop();
|
|
1409
|
+
isFallbackRef.current = true;
|
|
1410
|
+
setErrorState(null);
|
|
1411
|
+
const utterance = new SpeechSynthesisUtterance(text);
|
|
1412
|
+
utterance.rate = audioParams?.speech_rate || 1;
|
|
1413
|
+
const voices = window.speechSynthesis.getVoices();
|
|
1414
|
+
const zhVoice = voices.find((v) => v.lang.includes("zh"));
|
|
1415
|
+
if (zhVoice) utterance.voice = zhVoice;
|
|
1416
|
+
utterance.onstart = () => {
|
|
1417
|
+
setState((prev) => ({ ...prev, isPlaying: true, isPaused: false }));
|
|
1502
1418
|
onPlayStart?.();
|
|
1503
|
-
initAudioContext();
|
|
1504
|
-
activeInstances2.set(instanceId, { pause });
|
|
1505
1419
|
};
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1420
|
+
utterance.onend = () => {
|
|
1421
|
+
setState((prev) => ({
|
|
1422
|
+
...prev,
|
|
1423
|
+
isPlaying: false,
|
|
1424
|
+
isPaused: false,
|
|
1425
|
+
progress: 100
|
|
1426
|
+
}));
|
|
1509
1427
|
onPlayEnd?.();
|
|
1510
|
-
activeInstances2.delete(instanceId);
|
|
1511
1428
|
};
|
|
1512
|
-
|
|
1513
|
-
console.error("[
|
|
1514
|
-
setErrorState(
|
|
1515
|
-
onError?.(new Error(
|
|
1516
|
-
};
|
|
1517
|
-
audio.ontimeupdate = () => {
|
|
1518
|
-
let duration = audio.duration;
|
|
1519
|
-
if (!isFinite(duration) && audio.buffered.length > 0) {
|
|
1520
|
-
duration = audio.buffered.end(audio.buffered.length - 1);
|
|
1521
|
-
}
|
|
1522
|
-
if (isFinite(duration) && duration > 0) {
|
|
1523
|
-
setProgress(audio.currentTime / duration * 100);
|
|
1524
|
-
}
|
|
1429
|
+
utterance.onerror = (e) => {
|
|
1430
|
+
console.error("[useMessageTTS] Fallback TTS failed", e);
|
|
1431
|
+
setErrorState("Fallback TTS failed");
|
|
1432
|
+
onError?.(new Error("Fallback TTS failed"));
|
|
1525
1433
|
};
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1434
|
+
fallbackUtteranceRef.current = utterance;
|
|
1435
|
+
window.speechSynthesis.speak(utterance);
|
|
1436
|
+
},
|
|
1437
|
+
[audioParams, onError, onPlayEnd, onPlayStart, stop]
|
|
1438
|
+
);
|
|
1439
|
+
const handleError = (0, import_react3.useCallback)(
|
|
1440
|
+
(text, failedVoice) => {
|
|
1441
|
+
if (fallbackVoice && failedVoice !== fallbackVoice) {
|
|
1442
|
+
console.warn(
|
|
1443
|
+
`[useMessageTTS] Voice ${failedVoice} failed, switching to fallback voice ${fallbackVoice}`
|
|
1444
|
+
);
|
|
1445
|
+
const newId = internalStreamId || `msg-tts-retry-${Date.now()}`;
|
|
1446
|
+
setInternalStreamId(newId);
|
|
1447
|
+
const session = StreamPlaybackManager.createSession(newId, {
|
|
1448
|
+
ttsConfig,
|
|
1449
|
+
audioParams: { ...audioParams, speaker: fallbackVoice },
|
|
1450
|
+
autoPlay,
|
|
1451
|
+
metricsCollector,
|
|
1452
|
+
visualization,
|
|
1453
|
+
onPlayStart,
|
|
1454
|
+
onPlayPause,
|
|
1455
|
+
onPlayResume,
|
|
1456
|
+
onPlayEnd,
|
|
1457
|
+
onError: () => playFallback(text)
|
|
1458
|
+
});
|
|
1459
|
+
session.play(text);
|
|
1460
|
+
} else {
|
|
1461
|
+
playFallback(text);
|
|
1462
|
+
}
|
|
1463
|
+
},
|
|
1464
|
+
[
|
|
1465
|
+
fallbackVoice,
|
|
1466
|
+
playFallback,
|
|
1467
|
+
ttsConfig,
|
|
1468
|
+
audioParams,
|
|
1469
|
+
autoPlay,
|
|
1470
|
+
metricsCollector,
|
|
1471
|
+
visualization,
|
|
1472
|
+
onPlayStart,
|
|
1473
|
+
onPlayPause,
|
|
1474
|
+
onPlayResume,
|
|
1475
|
+
onPlayEnd,
|
|
1476
|
+
internalStreamId
|
|
1477
|
+
]
|
|
1478
|
+
);
|
|
1479
|
+
const play = (0, import_react3.useCallback)(
|
|
1480
|
+
async (text) => {
|
|
1481
|
+
let shouldSwitchToIndependent = false;
|
|
1482
|
+
if (isSubscriptionMode) {
|
|
1483
|
+
const session2 = StreamPlaybackManager.getSession(externalStreamId || "");
|
|
1484
|
+
if (!session2) {
|
|
1485
|
+
console.log(
|
|
1486
|
+
"[useMessageTTS] Stream session not found, switching to independent play mode"
|
|
1487
|
+
);
|
|
1488
|
+
shouldSwitchToIndependent = true;
|
|
1489
|
+
setIsSwitchedToIndependent(true);
|
|
1490
|
+
} else if (session2.state.isStreamFinished) {
|
|
1491
|
+
console.log(
|
|
1492
|
+
"[useMessageTTS] Stream finished, switching to independent play mode"
|
|
1493
|
+
);
|
|
1494
|
+
shouldSwitchToIndependent = true;
|
|
1495
|
+
setIsSwitchedToIndependent(true);
|
|
1496
|
+
} else if (session2.state.isSynthesizing || session2.state.isPlaying) {
|
|
1497
|
+
console.warn(
|
|
1498
|
+
"[useMessageTTS] play() called in subscription mode while streaming, ignoring"
|
|
1499
|
+
);
|
|
1500
|
+
return;
|
|
1501
|
+
} else {
|
|
1502
|
+
console.log(
|
|
1503
|
+
"[useMessageTTS] Stream not active, switching to independent play mode"
|
|
1504
|
+
);
|
|
1505
|
+
shouldSwitchToIndependent = true;
|
|
1506
|
+
setIsSwitchedToIndependent(true);
|
|
1538
1507
|
}
|
|
1539
|
-
}
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
format: audioParams?.format || "mp3",
|
|
1556
|
-
speech_rate: audioParams?.speech_rate,
|
|
1557
|
-
pitch_rate: audioParams?.pitch_rate,
|
|
1558
|
-
loudness_rate: audioParams?.loudness_rate
|
|
1559
|
-
},
|
|
1560
|
-
additions: JSON.stringify({
|
|
1561
|
-
enable_language_detector: true,
|
|
1562
|
-
disable_markdown_filter: true,
|
|
1563
|
-
enable_latex_tn: true
|
|
1564
|
-
})
|
|
1565
|
-
}
|
|
1566
|
-
},
|
|
1567
|
-
// ===== 关键回调 =====
|
|
1568
|
-
onStart: () => {
|
|
1569
|
-
setIsConnected(true);
|
|
1570
|
-
isConnectedRef.current = true;
|
|
1571
|
-
console.log("[useStreamTTS] WebSocket connected, waiting for text...");
|
|
1572
|
-
},
|
|
1573
|
-
onSessionStarted: () => {
|
|
1574
|
-
setIsSessionStarted(true);
|
|
1575
|
-
isSessionStartedRef.current = true;
|
|
1576
|
-
console.log("[useStreamTTS] Session started, can send text now");
|
|
1577
|
-
if (segmentQueueRef.current.length > 0) {
|
|
1578
|
-
sendNextSegment();
|
|
1579
|
-
}
|
|
1580
|
-
},
|
|
1581
|
-
onMessage: (data) => {
|
|
1582
|
-
setIsSynthesizing(true);
|
|
1583
|
-
if (sessionAudioBuffersRef.current.length === 0) {
|
|
1584
|
-
metricsCollector.record({
|
|
1585
|
-
name: "tts_latency",
|
|
1586
|
-
labels: { stage: "first_packet", voice },
|
|
1587
|
-
value: Date.now() - startTime,
|
|
1588
|
-
timestamp: Date.now()
|
|
1589
|
-
});
|
|
1590
|
-
}
|
|
1591
|
-
const buffer = data instanceof ArrayBuffer ? data.slice(0) : new Uint8Array(data).buffer;
|
|
1592
|
-
sessionAudioBuffersRef.current.push(buffer);
|
|
1593
|
-
},
|
|
1594
|
-
onSessionFinished: () => {
|
|
1595
|
-
setIsSynthesizing(false);
|
|
1596
|
-
setIsSessionStarted(false);
|
|
1597
|
-
isSessionStartedRef.current = false;
|
|
1598
|
-
calledSessionStartedRef.current = false;
|
|
1599
|
-
if (sessionAudioBuffersRef.current.length > 0 && streamTextRef.current) {
|
|
1600
|
-
const speed = audioParams?.speech_rate || 0;
|
|
1601
|
-
const cacheKey = TTSCache.generateKey(streamTextRef.current, voice, speed);
|
|
1602
|
-
TTSCache.set(cacheKey, [...sessionAudioBuffersRef.current]);
|
|
1603
|
-
sessionAudioCache.set(instanceId, {
|
|
1604
|
-
streamText: streamTextRef.current,
|
|
1605
|
-
audioBuffers: [...sessionAudioBuffersRef.current],
|
|
1606
|
-
timestamp: Date.now(),
|
|
1607
|
-
voice,
|
|
1608
|
-
speed
|
|
1609
|
-
});
|
|
1610
|
-
console.log(`[useStreamTTS] Session finished, cached ${sessionAudioBuffersRef.current.length} audio buffers`);
|
|
1611
|
-
}
|
|
1612
|
-
metricsCollector.record({
|
|
1613
|
-
name: "tts_synthesis_finished",
|
|
1614
|
-
labels: { voice, text_length: streamTextRef.current.length },
|
|
1615
|
-
value: Date.now() - startTime,
|
|
1616
|
-
timestamp: Date.now()
|
|
1617
|
-
});
|
|
1618
|
-
},
|
|
1508
|
+
}
|
|
1509
|
+
currentTextRef.current = text;
|
|
1510
|
+
stop();
|
|
1511
|
+
setErrorState(null);
|
|
1512
|
+
isFallbackRef.current = false;
|
|
1513
|
+
const id = `msg-tts-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
1514
|
+
const session = StreamPlaybackManager.createSession(id, {
|
|
1515
|
+
ttsConfig,
|
|
1516
|
+
audioParams,
|
|
1517
|
+
autoPlay,
|
|
1518
|
+
metricsCollector,
|
|
1519
|
+
visualization,
|
|
1520
|
+
onPlayStart,
|
|
1521
|
+
onPlayPause,
|
|
1522
|
+
onPlayResume,
|
|
1523
|
+
onPlayEnd,
|
|
1619
1524
|
onError: (err) => {
|
|
1620
|
-
|
|
1621
|
-
setErrorState(err.msg || "TTS error");
|
|
1622
|
-
onError?.(new Error(err.msg || "TTS error"));
|
|
1623
|
-
setIsSynthesizing(false);
|
|
1525
|
+
handleError(text, audioParams?.speaker || "");
|
|
1624
1526
|
}
|
|
1625
1527
|
});
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1528
|
+
setInternalStreamId(id);
|
|
1529
|
+
await session.play(text);
|
|
1530
|
+
},
|
|
1531
|
+
[
|
|
1532
|
+
isSubscriptionMode,
|
|
1533
|
+
externalStreamId,
|
|
1534
|
+
stop,
|
|
1535
|
+
ttsConfig,
|
|
1536
|
+
audioParams,
|
|
1537
|
+
autoPlay,
|
|
1538
|
+
metricsCollector,
|
|
1539
|
+
visualization,
|
|
1540
|
+
onPlayStart,
|
|
1541
|
+
onPlayPause,
|
|
1542
|
+
onPlayResume,
|
|
1543
|
+
onPlayEnd,
|
|
1544
|
+
handleError
|
|
1545
|
+
]
|
|
1546
|
+
);
|
|
1547
|
+
const seek = (0, import_react3.useCallback)(
|
|
1548
|
+
(percentage) => {
|
|
1549
|
+
if (streamId) {
|
|
1550
|
+
StreamPlaybackManager.getSession(streamId)?.seek(percentage);
|
|
1634
1551
|
}
|
|
1635
|
-
}
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1552
|
+
},
|
|
1553
|
+
[streamId]
|
|
1554
|
+
);
|
|
1555
|
+
const getFrequencyData = (0, import_react3.useCallback)(
|
|
1556
|
+
() => state.visualizationData.frequencyData,
|
|
1557
|
+
[state.visualizationData]
|
|
1558
|
+
);
|
|
1559
|
+
const getTimeDomainData = (0, import_react3.useCallback)(
|
|
1560
|
+
() => state.visualizationData.timeDomainData,
|
|
1561
|
+
[state.visualizationData]
|
|
1562
|
+
);
|
|
1563
|
+
const isStreamActive = !!(externalStreamId && (state.isPlaying || state.isPaused || state.isSynthesizing));
|
|
1564
|
+
const canResume = (0, import_react3.useCallback)(() => {
|
|
1565
|
+
if (!streamId) return false;
|
|
1566
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1567
|
+
return !!session;
|
|
1568
|
+
}, [streamId]);
|
|
1569
|
+
return {
|
|
1570
|
+
isPlaying: state.isPlaying,
|
|
1571
|
+
isPaused: state.isPaused,
|
|
1572
|
+
isSynthesizing: state.isSynthesizing,
|
|
1573
|
+
progress: state.progress,
|
|
1574
|
+
error,
|
|
1575
|
+
play,
|
|
1576
|
+
pause,
|
|
1577
|
+
resume,
|
|
1578
|
+
stop,
|
|
1579
|
+
togglePlay,
|
|
1580
|
+
seek,
|
|
1581
|
+
getFrequencyData,
|
|
1582
|
+
getTimeDomainData,
|
|
1583
|
+
visualizationData: state.visualizationData,
|
|
1584
|
+
isStreamActive,
|
|
1585
|
+
streamState: state,
|
|
1586
|
+
canResume
|
|
1587
|
+
};
|
|
1588
|
+
}
|
|
1589
|
+
|
|
1590
|
+
// src/tts/useStreamTTS.ts
|
|
1591
|
+
var import_react4 = require("react");
|
|
1592
|
+
function useStreamTTS({
|
|
1593
|
+
ttsConfig,
|
|
1594
|
+
audioParams,
|
|
1595
|
+
autoPlay = true,
|
|
1596
|
+
metricsCollector = new NoopMetricsCollector(),
|
|
1597
|
+
onPlayStart,
|
|
1598
|
+
onPlayPause,
|
|
1599
|
+
onPlayResume,
|
|
1600
|
+
onPlayEnd,
|
|
1601
|
+
onError,
|
|
1602
|
+
visualization,
|
|
1603
|
+
maxSegmentLength = 150
|
|
1604
|
+
}) {
|
|
1605
|
+
const [streamId, setStreamId] = (0, import_react4.useState)("");
|
|
1606
|
+
const streamIdRef = (0, import_react4.useRef)("");
|
|
1607
|
+
const [state, setState] = (0, import_react4.useState)({
|
|
1608
|
+
isPlaying: false,
|
|
1609
|
+
isPaused: false,
|
|
1610
|
+
isSynthesizing: false,
|
|
1611
|
+
progress: 0,
|
|
1612
|
+
visualizationData: {
|
|
1613
|
+
frequencyData: new Uint8Array(0),
|
|
1614
|
+
timeDomainData: new Uint8Array(0)
|
|
1615
|
+
},
|
|
1616
|
+
error: null,
|
|
1617
|
+
isConnected: false,
|
|
1618
|
+
isSessionStarted: false,
|
|
1619
|
+
isStreamFinished: false
|
|
1620
|
+
});
|
|
1621
|
+
const [streamText, setStreamText] = (0, import_react4.useState)("");
|
|
1622
|
+
const streamTextRef = (0, import_react4.useRef)("");
|
|
1623
|
+
const connect = (0, import_react4.useCallback)(async () => {
|
|
1624
|
+
const newStreamId = `tts-stream-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
1625
|
+
setStreamId(newStreamId);
|
|
1626
|
+
streamIdRef.current = newStreamId;
|
|
1627
|
+
streamTextRef.current = "";
|
|
1628
|
+
setStreamText("");
|
|
1629
|
+
const session = StreamPlaybackManager.createSession(newStreamId, {
|
|
1630
|
+
ttsConfig,
|
|
1631
|
+
audioParams,
|
|
1632
|
+
autoPlay,
|
|
1633
|
+
metricsCollector,
|
|
1634
|
+
visualization,
|
|
1635
|
+
maxSegmentLength,
|
|
1636
|
+
onPlayStart,
|
|
1637
|
+
onPlayPause,
|
|
1638
|
+
onPlayResume,
|
|
1639
|
+
onPlayEnd,
|
|
1640
|
+
onError: (err) => {
|
|
1641
|
+
setState((prev) => ({ ...prev, error: err.message }));
|
|
1642
|
+
onError?.(err);
|
|
1643
|
+
}
|
|
1644
|
+
});
|
|
1645
|
+
await session.connect();
|
|
1646
|
+
return newStreamId;
|
|
1640
1647
|
}, [
|
|
1641
1648
|
ttsConfig,
|
|
1642
1649
|
audioParams,
|
|
1643
1650
|
autoPlay,
|
|
1644
|
-
stop,
|
|
1645
|
-
instanceId,
|
|
1646
|
-
onPlayStart,
|
|
1647
|
-
onPlayEnd,
|
|
1648
|
-
initAudioContext,
|
|
1649
|
-
pause,
|
|
1650
1651
|
metricsCollector,
|
|
1652
|
+
visualization,
|
|
1651
1653
|
maxSegmentLength,
|
|
1652
|
-
|
|
1654
|
+
onPlayStart,
|
|
1655
|
+
onPlayPause,
|
|
1656
|
+
onPlayResume,
|
|
1657
|
+
onPlayEnd,
|
|
1653
1658
|
onError
|
|
1654
1659
|
]);
|
|
1660
|
+
(0, import_react4.useEffect)(() => {
|
|
1661
|
+
if (!streamId) return;
|
|
1662
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1663
|
+
if (!session) return;
|
|
1664
|
+
const unsubscribe = session.subscribe((newState) => {
|
|
1665
|
+
setState(newState);
|
|
1666
|
+
});
|
|
1667
|
+
return () => {
|
|
1668
|
+
unsubscribe();
|
|
1669
|
+
};
|
|
1670
|
+
}, [streamId]);
|
|
1655
1671
|
const onMessage = (0, import_react4.useCallback)((chunk) => {
|
|
1656
|
-
if (!
|
|
1672
|
+
if (!streamIdRef.current) return;
|
|
1657
1673
|
streamTextRef.current += chunk;
|
|
1658
1674
|
setStreamText(streamTextRef.current);
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
calledSessionStartedRef.current = true;
|
|
1662
|
-
clientRef.current.startSession();
|
|
1663
|
-
}
|
|
1664
|
-
splitterRef.current?.onChunk(chunk);
|
|
1675
|
+
const session = StreamPlaybackManager.getSession(streamIdRef.current);
|
|
1676
|
+
session?.handleStreamChunk(chunk);
|
|
1665
1677
|
}, []);
|
|
1666
1678
|
const finishStream = (0, import_react4.useCallback)(async () => {
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
if (segmentQueueRef.current.length > 0 || isSendingRef.current) {
|
|
1671
|
-
await new Promise((resolve) => {
|
|
1672
|
-
resolveAllSegmentsSentRef.current = resolve;
|
|
1673
|
-
});
|
|
1674
|
-
} else if (clientRef.current && isSessionStartedRef.current && !isSessionFinishedRef.current) {
|
|
1675
|
-
isSessionFinishedRef.current = true;
|
|
1676
|
-
clientRef.current.finishSession();
|
|
1677
|
-
}
|
|
1679
|
+
if (!streamIdRef.current) return;
|
|
1680
|
+
const session = StreamPlaybackManager.getSession(streamIdRef.current);
|
|
1681
|
+
await session?.finishStream();
|
|
1678
1682
|
}, []);
|
|
1679
|
-
const
|
|
1680
|
-
if (
|
|
1681
|
-
|
|
1682
|
-
if (!isFinite(duration) && audioRef.current.buffered.length > 0) {
|
|
1683
|
-
duration = audioRef.current.buffered.end(audioRef.current.buffered.length - 1);
|
|
1684
|
-
}
|
|
1685
|
-
if (isFinite(duration) && duration > 0) {
|
|
1686
|
-
const time = percentage / 100 * duration;
|
|
1687
|
-
if (isFinite(time)) {
|
|
1688
|
-
audioRef.current.currentTime = time;
|
|
1689
|
-
setProgress(percentage);
|
|
1690
|
-
}
|
|
1691
|
-
}
|
|
1683
|
+
const pause = (0, import_react4.useCallback)(() => {
|
|
1684
|
+
if (streamIdRef.current) {
|
|
1685
|
+
StreamPlaybackManager.pause(streamIdRef.current);
|
|
1692
1686
|
}
|
|
1693
1687
|
}, []);
|
|
1694
|
-
const
|
|
1695
|
-
if (
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
return dataArray;
|
|
1688
|
+
const resume = (0, import_react4.useCallback)(() => {
|
|
1689
|
+
if (streamIdRef.current) {
|
|
1690
|
+
StreamPlaybackManager.resume(streamIdRef.current);
|
|
1691
|
+
}
|
|
1699
1692
|
}, []);
|
|
1700
|
-
const
|
|
1701
|
-
if (
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1693
|
+
const stop = (0, import_react4.useCallback)(() => {
|
|
1694
|
+
if (streamIdRef.current) {
|
|
1695
|
+
StreamPlaybackManager.stop(streamIdRef.current);
|
|
1696
|
+
setStreamId("");
|
|
1697
|
+
streamIdRef.current = "";
|
|
1698
|
+
}
|
|
1705
1699
|
}, []);
|
|
1706
|
-
(0, import_react4.
|
|
1707
|
-
if (
|
|
1708
|
-
|
|
1709
|
-
let lastUpdate = 0;
|
|
1710
|
-
const interval = visualization.refreshInterval || 0;
|
|
1711
|
-
const update = (timestamp) => {
|
|
1712
|
-
if (isPlaying && !isPaused) {
|
|
1713
|
-
if (timestamp - lastUpdate >= interval) {
|
|
1714
|
-
setVisualizationData({
|
|
1715
|
-
frequencyData: getFrequencyData(),
|
|
1716
|
-
timeDomainData: getTimeDomainData()
|
|
1717
|
-
});
|
|
1718
|
-
lastUpdate = timestamp;
|
|
1719
|
-
}
|
|
1720
|
-
animId = requestAnimationFrame(update);
|
|
1721
|
-
}
|
|
1722
|
-
};
|
|
1723
|
-
if (isPlaying && !isPaused) {
|
|
1724
|
-
animId = requestAnimationFrame(update);
|
|
1700
|
+
const seek = (0, import_react4.useCallback)((percentage) => {
|
|
1701
|
+
if (streamIdRef.current) {
|
|
1702
|
+
StreamPlaybackManager.getSession(streamIdRef.current)?.seek(percentage);
|
|
1725
1703
|
}
|
|
1726
|
-
|
|
1727
|
-
if (animId) cancelAnimationFrame(animId);
|
|
1728
|
-
};
|
|
1729
|
-
}, [isPlaying, isPaused, visualization, getFrequencyData, getTimeDomainData]);
|
|
1704
|
+
}, []);
|
|
1730
1705
|
(0, import_react4.useEffect)(() => {
|
|
1731
1706
|
return () => {
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
audioContextRef.current.close();
|
|
1707
|
+
if (streamIdRef.current) {
|
|
1708
|
+
StreamPlaybackManager.stop(streamIdRef.current);
|
|
1735
1709
|
}
|
|
1736
1710
|
};
|
|
1737
|
-
}, [
|
|
1711
|
+
}, []);
|
|
1712
|
+
const getFrequencyData = (0, import_react4.useCallback)(
|
|
1713
|
+
() => state.visualizationData.frequencyData,
|
|
1714
|
+
[state.visualizationData]
|
|
1715
|
+
);
|
|
1716
|
+
const getTimeDomainData = (0, import_react4.useCallback)(
|
|
1717
|
+
() => state.visualizationData.timeDomainData,
|
|
1718
|
+
[state.visualizationData]
|
|
1719
|
+
);
|
|
1738
1720
|
return {
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1721
|
+
streamId,
|
|
1722
|
+
isConnected: state.isConnected,
|
|
1723
|
+
isSessionStarted: state.isSessionStarted,
|
|
1724
|
+
isSynthesizing: state.isSynthesizing,
|
|
1725
|
+
isPlaying: state.isPlaying,
|
|
1726
|
+
isPaused: state.isPaused,
|
|
1727
|
+
error: state.error,
|
|
1745
1728
|
streamText,
|
|
1746
|
-
progress,
|
|
1729
|
+
progress: state.progress,
|
|
1747
1730
|
connect,
|
|
1748
1731
|
onMessage,
|
|
1749
1732
|
finishStream,
|
|
@@ -1753,23 +1736,9 @@ function useStreamTTS({
|
|
|
1753
1736
|
seek,
|
|
1754
1737
|
getFrequencyData,
|
|
1755
1738
|
getTimeDomainData,
|
|
1756
|
-
visualizationData
|
|
1739
|
+
visualizationData: state.visualizationData
|
|
1757
1740
|
};
|
|
1758
1741
|
}
|
|
1759
|
-
function getSessionAudioCache(instanceId) {
|
|
1760
|
-
return sessionAudioCache.get(instanceId);
|
|
1761
|
-
}
|
|
1762
|
-
function clearSessionAudioCache(instanceId) {
|
|
1763
|
-
sessionAudioCache.delete(instanceId);
|
|
1764
|
-
}
|
|
1765
|
-
function findSessionCacheByText(streamText, voice, speed) {
|
|
1766
|
-
for (const entry of sessionAudioCache.values()) {
|
|
1767
|
-
if (entry.streamText === streamText && entry.voice === voice && entry.speed === speed) {
|
|
1768
|
-
return entry;
|
|
1769
|
-
}
|
|
1770
|
-
}
|
|
1771
|
-
return void 0;
|
|
1772
|
-
}
|
|
1773
1742
|
|
|
1774
1743
|
// src/components/AudioWaveVisualizer.tsx
|
|
1775
1744
|
var import_react5 = require("react");
|
|
@@ -2145,10 +2114,8 @@ var AudioProgressBar_default = AudioProgressBar;
|
|
|
2145
2114
|
0 && (module.exports = {
|
|
2146
2115
|
AudioProgressBar,
|
|
2147
2116
|
AudioWaveVisualizer,
|
|
2117
|
+
StreamPlaybackManager,
|
|
2148
2118
|
StreamingTextSplitter,
|
|
2149
|
-
clearSessionAudioCache,
|
|
2150
|
-
findSessionCacheByText,
|
|
2151
|
-
getSessionAudioCache,
|
|
2152
2119
|
splitTextByDelimiters,
|
|
2153
2120
|
useMessageTTS,
|
|
2154
2121
|
useStreamTTS,
|