@wq-hook/volcano-react 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +129 -40
- package/dist/index.d.ts +129 -40
- package/dist/index.js +1080 -1113
- package/dist/index.mjs +1079 -1110
- package/package.json +2 -2
package/dist/index.mjs
CHANGED
|
@@ -396,10 +396,253 @@ function useVolcanoTTS({
|
|
|
396
396
|
}
|
|
397
397
|
|
|
398
398
|
// src/tts/useMessageTTS.ts
|
|
399
|
+
import { useCallback as useCallback3, useEffect as useEffect2, useRef as useRef3, useState as useState3 } from "react";
|
|
400
|
+
|
|
401
|
+
// src/tts/StreamPlaybackManager.ts
|
|
399
402
|
import { WebsocketMSE as WebsocketMSE2 } from "@wq-hook/volcano-sdk/tts";
|
|
403
|
+
|
|
404
|
+
// src/tts/StreamingTextSplitter.ts
|
|
400
405
|
import { MarkdownFormatter as MarkdownFormatter2 } from "@wq-hook/volcano-sdk";
|
|
401
|
-
import { useCallback as useCallback3, useEffect as useEffect2, useRef as useRef3, useState as useState3 } from "react";
|
|
402
406
|
import emojiRegex2 from "emoji-regex";
|
|
407
|
+
var StreamingTextSplitter = class {
|
|
408
|
+
constructor(options = {}) {
|
|
409
|
+
/** 当前缓冲区 */
|
|
410
|
+
this.buffer = "";
|
|
411
|
+
/** 分段索引计数器 */
|
|
412
|
+
this.segmentIndex = 0;
|
|
413
|
+
/** 已完成的分段列表 */
|
|
414
|
+
this.segments = [];
|
|
415
|
+
/** 是否已完成 */
|
|
416
|
+
this.isCompleted = false;
|
|
417
|
+
this.maxLength = options.maxLength || 150;
|
|
418
|
+
this.minLength = options.minLength || 10;
|
|
419
|
+
this.onSegmentComplete = options.onSegmentComplete;
|
|
420
|
+
this.onAllComplete = options.onAllComplete;
|
|
421
|
+
}
|
|
422
|
+
/**
|
|
423
|
+
* 接收流式文本块
|
|
424
|
+
* @param chunk - 文本块
|
|
425
|
+
*/
|
|
426
|
+
onChunk(chunk) {
|
|
427
|
+
if (!chunk || this.isCompleted) return;
|
|
428
|
+
this.buffer += chunk;
|
|
429
|
+
if (this.detectBoundary(chunk)) {
|
|
430
|
+
const newlineIndex = this.buffer.indexOf("\n");
|
|
431
|
+
if (newlineIndex !== -1) {
|
|
432
|
+
if (newlineIndex === 0) {
|
|
433
|
+
this.buffer = this.buffer.substring(1);
|
|
434
|
+
return;
|
|
435
|
+
}
|
|
436
|
+
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
437
|
+
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
438
|
+
this.flushSegmentWithBuffer(segmentBuffer);
|
|
439
|
+
while (this.buffer.includes("\n")) {
|
|
440
|
+
const nextNewlineIndex = this.buffer.indexOf("\n");
|
|
441
|
+
if (nextNewlineIndex === 0) {
|
|
442
|
+
this.buffer = this.buffer.substring(1);
|
|
443
|
+
continue;
|
|
444
|
+
}
|
|
445
|
+
const nextSegmentBuffer = this.buffer.substring(0, nextNewlineIndex);
|
|
446
|
+
this.buffer = this.buffer.substring(nextNewlineIndex + 1);
|
|
447
|
+
this.flushSegmentWithBuffer(nextSegmentBuffer);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
/**
|
|
453
|
+
* 检测分段边界
|
|
454
|
+
* @param chunk - 最新接收的文本块
|
|
455
|
+
* @returns 是否应该分段
|
|
456
|
+
*/
|
|
457
|
+
detectBoundary(chunk) {
|
|
458
|
+
if (chunk.includes("\n")) {
|
|
459
|
+
if (this.buffer.length >= this.maxLength) {
|
|
460
|
+
this.forceSplitAtSentenceBoundary();
|
|
461
|
+
}
|
|
462
|
+
return true;
|
|
463
|
+
}
|
|
464
|
+
if (this.buffer.length >= this.maxLength) {
|
|
465
|
+
this.forceSplitAtSentenceBoundary();
|
|
466
|
+
return true;
|
|
467
|
+
}
|
|
468
|
+
return false;
|
|
469
|
+
}
|
|
470
|
+
/**
|
|
471
|
+
* 在句子边界强制拆分超长段落
|
|
472
|
+
*/
|
|
473
|
+
forceSplitAtSentenceBoundary() {
|
|
474
|
+
const content = this.buffer;
|
|
475
|
+
const sentenceEnders = /[。?!]/g;
|
|
476
|
+
let lastMatch = null;
|
|
477
|
+
let match = null;
|
|
478
|
+
while ((match = sentenceEnders.exec(content)) !== null) {
|
|
479
|
+
lastMatch = match;
|
|
480
|
+
}
|
|
481
|
+
if (lastMatch && lastMatch.index > this.minLength) {
|
|
482
|
+
const splitPoint = lastMatch.index + 1;
|
|
483
|
+
const firstPart = content.substring(0, splitPoint);
|
|
484
|
+
const secondPart = content.substring(splitPoint);
|
|
485
|
+
this.buffer = firstPart;
|
|
486
|
+
this.flushSegment();
|
|
487
|
+
this.buffer = secondPart;
|
|
488
|
+
} else {
|
|
489
|
+
const midPoint = Math.floor(content.length / 2);
|
|
490
|
+
const firstPart = content.substring(0, midPoint);
|
|
491
|
+
const secondPart = content.substring(midPoint);
|
|
492
|
+
this.buffer = firstPart;
|
|
493
|
+
this.flushSegment();
|
|
494
|
+
this.buffer = secondPart;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
/**
|
|
498
|
+
* 使用指定缓冲区内容刷新为分段
|
|
499
|
+
* @param bufferToFlush - 要分段的缓冲区内容
|
|
500
|
+
*/
|
|
501
|
+
flushSegmentWithBuffer(bufferToFlush) {
|
|
502
|
+
const content = bufferToFlush;
|
|
503
|
+
if (!content) return;
|
|
504
|
+
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
505
|
+
const isTooShort = content.length < 3;
|
|
506
|
+
if (isPureSymbols && isTooShort) {
|
|
507
|
+
return;
|
|
508
|
+
}
|
|
509
|
+
const formattedContent = MarkdownFormatter2.format(content).replace(emojiRegex2(), "");
|
|
510
|
+
if (!formattedContent) return;
|
|
511
|
+
let subSegments = [formattedContent];
|
|
512
|
+
if (formattedContent.length > this.maxLength) {
|
|
513
|
+
subSegments = this.splitLongSegment(formattedContent);
|
|
514
|
+
}
|
|
515
|
+
for (const subSegment of subSegments) {
|
|
516
|
+
if (!subSegment) continue;
|
|
517
|
+
const segment = {
|
|
518
|
+
index: this.segmentIndex++,
|
|
519
|
+
content: subSegment,
|
|
520
|
+
length: subSegment.length,
|
|
521
|
+
sent: false
|
|
522
|
+
};
|
|
523
|
+
this.segments.push(segment);
|
|
524
|
+
this.onSegmentComplete?.(segment);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
/**
|
|
528
|
+
* 刷新当前缓冲区为分段
|
|
529
|
+
*/
|
|
530
|
+
flushSegment() {
|
|
531
|
+
const content = this.buffer.trim();
|
|
532
|
+
if (!content) {
|
|
533
|
+
this.buffer = "";
|
|
534
|
+
return;
|
|
535
|
+
}
|
|
536
|
+
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
537
|
+
const isTooShort = content.length < 3;
|
|
538
|
+
if (isPureSymbols && isTooShort) {
|
|
539
|
+
this.buffer = "";
|
|
540
|
+
return;
|
|
541
|
+
}
|
|
542
|
+
const formattedContent = MarkdownFormatter2.format(content).replace(emojiRegex2(), "");
|
|
543
|
+
if (!formattedContent) {
|
|
544
|
+
this.buffer = "";
|
|
545
|
+
return;
|
|
546
|
+
}
|
|
547
|
+
let subSegments = [formattedContent];
|
|
548
|
+
if (formattedContent.length > this.maxLength) {
|
|
549
|
+
subSegments = this.splitLongSegment(formattedContent);
|
|
550
|
+
}
|
|
551
|
+
for (const subSegment of subSegments) {
|
|
552
|
+
if (!subSegment) continue;
|
|
553
|
+
const segment = {
|
|
554
|
+
index: this.segmentIndex++,
|
|
555
|
+
content: subSegment,
|
|
556
|
+
length: subSegment.length,
|
|
557
|
+
sent: false
|
|
558
|
+
};
|
|
559
|
+
this.segments.push(segment);
|
|
560
|
+
this.onSegmentComplete?.(segment);
|
|
561
|
+
}
|
|
562
|
+
this.buffer = "";
|
|
563
|
+
}
|
|
564
|
+
/**
|
|
565
|
+
* 拆分超长分段
|
|
566
|
+
* @param segment - 超长的分段
|
|
567
|
+
* @returns 拆分后的分段数组
|
|
568
|
+
*/
|
|
569
|
+
splitLongSegment(segment) {
|
|
570
|
+
const result = [];
|
|
571
|
+
let current = "";
|
|
572
|
+
for (const char of segment) {
|
|
573
|
+
current += char;
|
|
574
|
+
const shouldSplit = /[。?!,,]/.test(char);
|
|
575
|
+
if (shouldSplit && current.length <= this.maxLength) {
|
|
576
|
+
result.push(current);
|
|
577
|
+
current = "";
|
|
578
|
+
} else if (current.length >= this.maxLength) {
|
|
579
|
+
result.push(current);
|
|
580
|
+
current = "";
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
if (current) {
|
|
584
|
+
result.push(current);
|
|
585
|
+
}
|
|
586
|
+
return result.filter((s) => s.length > 0);
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* 完成流式输入
|
|
590
|
+
* 处理剩余的缓冲区内容
|
|
591
|
+
*/
|
|
592
|
+
complete() {
|
|
593
|
+
if (this.isCompleted) return;
|
|
594
|
+
this.isCompleted = true;
|
|
595
|
+
while (this.buffer.includes("\n")) {
|
|
596
|
+
const newlineIndex = this.buffer.indexOf("\n");
|
|
597
|
+
if (newlineIndex === 0) {
|
|
598
|
+
this.buffer = this.buffer.substring(1);
|
|
599
|
+
continue;
|
|
600
|
+
}
|
|
601
|
+
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
602
|
+
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
603
|
+
this.flushSegmentWithBuffer(segmentBuffer);
|
|
604
|
+
}
|
|
605
|
+
if (this.buffer.trim()) {
|
|
606
|
+
this.flushSegment();
|
|
607
|
+
}
|
|
608
|
+
this.onAllComplete?.(this.segments);
|
|
609
|
+
}
|
|
610
|
+
/**
|
|
611
|
+
* 重置分段器状态
|
|
612
|
+
*/
|
|
613
|
+
reset() {
|
|
614
|
+
this.buffer = "";
|
|
615
|
+
this.segmentIndex = 0;
|
|
616
|
+
this.segments = [];
|
|
617
|
+
this.isCompleted = false;
|
|
618
|
+
}
|
|
619
|
+
/**
|
|
620
|
+
* 获取当前缓冲区内容
|
|
621
|
+
*/
|
|
622
|
+
getBuffer() {
|
|
623
|
+
return this.buffer;
|
|
624
|
+
}
|
|
625
|
+
/**
|
|
626
|
+
* 获取已分段的列表
|
|
627
|
+
*/
|
|
628
|
+
getSegments() {
|
|
629
|
+
return this.segments;
|
|
630
|
+
}
|
|
631
|
+
/**
|
|
632
|
+
* 获取统计信息
|
|
633
|
+
*/
|
|
634
|
+
getStats() {
|
|
635
|
+
return {
|
|
636
|
+
bufferLength: this.buffer.length,
|
|
637
|
+
segmentCount: this.segments.length,
|
|
638
|
+
totalChars: this.segments.reduce((sum, seg) => sum + seg.length, 0)
|
|
639
|
+
};
|
|
640
|
+
}
|
|
641
|
+
};
|
|
642
|
+
|
|
643
|
+
// src/tts/StreamPlaybackManager.ts
|
|
644
|
+
import emojiRegex3 from "emoji-regex";
|
|
645
|
+
import { MarkdownFormatter as MarkdownFormatter3 } from "@wq-hook/volcano-sdk";
|
|
403
646
|
|
|
404
647
|
// src/tts/TextSplitter.ts
|
|
405
648
|
function splitTextByDelimiters(text, minLength = 10, maxLength = 150) {
|
|
@@ -469,338 +712,178 @@ function splitTextByDelimiters(text, minLength = 10, maxLength = 150) {
|
|
|
469
712
|
return segments;
|
|
470
713
|
}
|
|
471
714
|
|
|
472
|
-
// src/tts/
|
|
473
|
-
var NoopMetricsCollector = class {
|
|
474
|
-
record(_metric) {
|
|
475
|
-
}
|
|
476
|
-
};
|
|
477
|
-
|
|
478
|
-
// src/tts/useMessageTTS.ts
|
|
715
|
+
// src/tts/StreamPlaybackManager.ts
|
|
479
716
|
var WS_URL = "wss://openspeech.bytedance.com/api/v3/tts/bidirection";
|
|
480
|
-
var activeInstances = /* @__PURE__ */ new Map();
|
|
481
717
|
function buildFullUrl2(url, params) {
|
|
482
|
-
const { ...auth } = params;
|
|
483
718
|
const arr = [];
|
|
484
|
-
for (const key in
|
|
485
|
-
if (Object.prototype.hasOwnProperty.call(
|
|
719
|
+
for (const key in params) {
|
|
720
|
+
if (Object.prototype.hasOwnProperty.call(params, key)) {
|
|
486
721
|
arr.push(
|
|
487
|
-
`${key}=${encodeURIComponent(
|
|
722
|
+
`${key}=${encodeURIComponent(params[key])}`
|
|
488
723
|
);
|
|
489
724
|
}
|
|
490
725
|
}
|
|
491
726
|
return `${url}?${arr.join("&")}`;
|
|
492
727
|
}
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
const initAudioContext = useCallback3(() => {
|
|
540
|
-
if (!audioRef.current) return;
|
|
541
|
-
if (!audioContextRef.current) {
|
|
728
|
+
var PlaybackSession = class {
|
|
729
|
+
constructor(id, config) {
|
|
730
|
+
this.listeners = /* @__PURE__ */ new Set();
|
|
731
|
+
this.audioContext = null;
|
|
732
|
+
this.analyser = null;
|
|
733
|
+
this.source = null;
|
|
734
|
+
this.audioUrl = null;
|
|
735
|
+
// TTS Resources
|
|
736
|
+
this.client = null;
|
|
737
|
+
this.splitter = null;
|
|
738
|
+
// Internal State
|
|
739
|
+
this.segmentQueue = [];
|
|
740
|
+
this.isSending = false;
|
|
741
|
+
this.isSessionStarting = false;
|
|
742
|
+
this.streamText = "";
|
|
743
|
+
this.sessionAudioBuffers = [];
|
|
744
|
+
this.isStreamFinished = false;
|
|
745
|
+
this.isSessionFinished = false;
|
|
746
|
+
this.resolveAllSegmentsSent = null;
|
|
747
|
+
this.animId = null;
|
|
748
|
+
this.lastVisUpdate = 0;
|
|
749
|
+
this.id = id;
|
|
750
|
+
this.config = config;
|
|
751
|
+
this.state = {
|
|
752
|
+
isPlaying: false,
|
|
753
|
+
isPaused: false,
|
|
754
|
+
isSynthesizing: false,
|
|
755
|
+
progress: 0,
|
|
756
|
+
visualizationData: {
|
|
757
|
+
frequencyData: new Uint8Array(0),
|
|
758
|
+
timeDomainData: new Uint8Array(0)
|
|
759
|
+
},
|
|
760
|
+
error: null,
|
|
761
|
+
isConnected: false,
|
|
762
|
+
isSessionStarted: false,
|
|
763
|
+
isStreamFinished: false
|
|
764
|
+
};
|
|
765
|
+
this.audio = new Audio();
|
|
766
|
+
this.audio.crossOrigin = "anonymous";
|
|
767
|
+
this.setupAudioListeners();
|
|
768
|
+
}
|
|
769
|
+
/**
|
|
770
|
+
* 初始化 AudioContext(用于可视化)
|
|
771
|
+
*/
|
|
772
|
+
initAudioContext() {
|
|
773
|
+
if (!this.audioContext) {
|
|
542
774
|
const AudioContextClass = window.AudioContext || window.webkitAudioContext;
|
|
543
|
-
|
|
775
|
+
this.audioContext = new AudioContextClass();
|
|
544
776
|
}
|
|
545
|
-
if (
|
|
546
|
-
|
|
777
|
+
if (this.audioContext.state === "suspended") {
|
|
778
|
+
this.audioContext.resume();
|
|
547
779
|
}
|
|
548
|
-
if (!
|
|
549
|
-
|
|
550
|
-
|
|
780
|
+
if (!this.analyser && this.audioContext) {
|
|
781
|
+
this.analyser = this.audioContext.createAnalyser();
|
|
782
|
+
this.analyser.fftSize = this.config.visualization?.fftSize || 256;
|
|
551
783
|
}
|
|
552
|
-
if (!
|
|
784
|
+
if (!this.source && this.audioContext && this.analyser) {
|
|
553
785
|
try {
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
);
|
|
557
|
-
sourceRef.current.connect(analyserRef.current);
|
|
558
|
-
analyserRef.current.connect(audioContextRef.current.destination);
|
|
786
|
+
this.source = this.audioContext.createMediaElementSource(this.audio);
|
|
787
|
+
this.source.connect(this.analyser);
|
|
788
|
+
this.analyser.connect(this.audioContext.destination);
|
|
559
789
|
} catch (e) {
|
|
560
790
|
}
|
|
561
791
|
}
|
|
562
|
-
}
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
792
|
+
}
|
|
793
|
+
setupAudioListeners() {
|
|
794
|
+
this.audio.onplay = () => {
|
|
795
|
+
this.updateState({ isPlaying: true, isPaused: false });
|
|
796
|
+
this.config.onPlayStart?.();
|
|
797
|
+
this.initAudioContext();
|
|
798
|
+
this.startVisualizationLoop();
|
|
799
|
+
};
|
|
800
|
+
this.audio.onpause = () => {
|
|
801
|
+
this.updateState({ isPaused: true, isPlaying: false });
|
|
802
|
+
this.config.onPlayPause?.();
|
|
803
|
+
};
|
|
804
|
+
this.audio.onended = () => {
|
|
805
|
+
this.updateState({
|
|
806
|
+
isPlaying: false,
|
|
807
|
+
isPaused: false,
|
|
808
|
+
isSynthesizing: false,
|
|
809
|
+
progress: 100
|
|
810
|
+
});
|
|
811
|
+
this.config.onPlayEnd?.();
|
|
812
|
+
this.stopVisualizationLoop();
|
|
813
|
+
};
|
|
814
|
+
this.audio.onerror = (e) => {
|
|
815
|
+
const msg = this.audio.error?.message || "Audio playback error";
|
|
816
|
+
console.error("[PlaybackSession] Audio error:", msg);
|
|
817
|
+
this.updateState({ error: msg });
|
|
818
|
+
this.config.onError?.(new Error(msg));
|
|
819
|
+
};
|
|
820
|
+
this.audio.ontimeupdate = () => {
|
|
821
|
+
let duration = this.audio.duration;
|
|
822
|
+
if (!isFinite(duration) && this.audio.buffered.length > 0) {
|
|
823
|
+
duration = this.audio.buffered.end(this.audio.buffered.length - 1);
|
|
582
824
|
}
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
window.speechSynthesis.cancel();
|
|
587
|
-
fallbackUtteranceRef.current = null;
|
|
588
|
-
}
|
|
589
|
-
isFallbackRef.current = false;
|
|
590
|
-
}, []);
|
|
591
|
-
const stop = useCallback3(() => {
|
|
592
|
-
if (clientRef.current) {
|
|
593
|
-
clientRef.current.close();
|
|
594
|
-
clientRef.current = null;
|
|
595
|
-
}
|
|
596
|
-
cleanupAudio();
|
|
597
|
-
setIsPlaying(false);
|
|
598
|
-
setIsPaused(false);
|
|
599
|
-
setIsSynthesizing(false);
|
|
600
|
-
setProgress(0);
|
|
601
|
-
activeInstances.delete(instanceId);
|
|
602
|
-
}, [cleanupAudio, instanceId]);
|
|
603
|
-
const pause = useCallback3(() => {
|
|
604
|
-
if (isFallbackRef.current) {
|
|
605
|
-
window.speechSynthesis.pause();
|
|
606
|
-
} else if (audioRef.current) {
|
|
607
|
-
audioRef.current.pause();
|
|
608
|
-
}
|
|
609
|
-
setIsPaused(true);
|
|
610
|
-
setIsPlaying(false);
|
|
611
|
-
onPlayPause?.();
|
|
612
|
-
}, [onPlayPause]);
|
|
613
|
-
const resume = useCallback3(() => {
|
|
614
|
-
stopOthers();
|
|
615
|
-
if (isFallbackRef.current) {
|
|
616
|
-
window.speechSynthesis.resume();
|
|
617
|
-
} else if (audioRef.current) {
|
|
618
|
-
audioRef.current.play();
|
|
619
|
-
}
|
|
620
|
-
setIsPaused(false);
|
|
621
|
-
setIsPlaying(true);
|
|
622
|
-
onPlayResume?.();
|
|
623
|
-
activeInstances.set(instanceId, { pause });
|
|
624
|
-
}, [stopOthers, instanceId, pause, onPlayResume]);
|
|
625
|
-
const togglePlay = useCallback3(() => {
|
|
626
|
-
if (isPlaying) {
|
|
627
|
-
pause();
|
|
628
|
-
} else {
|
|
629
|
-
resume();
|
|
630
|
-
}
|
|
631
|
-
}, [isPlaying, pause, resume]);
|
|
632
|
-
const playFallback = useCallback3(
|
|
633
|
-
(text) => {
|
|
634
|
-
console.warn("[useMessageTTS] Switching to fallback TTS");
|
|
635
|
-
isFallbackRef.current = true;
|
|
636
|
-
if (clientRef.current) {
|
|
637
|
-
clientRef.current.close();
|
|
638
|
-
clientRef.current = null;
|
|
825
|
+
if (isFinite(duration) && duration > 0) {
|
|
826
|
+
const progress = this.audio.currentTime / duration * 100;
|
|
827
|
+
this.updateState({ progress });
|
|
639
828
|
}
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
829
|
+
};
|
|
830
|
+
}
|
|
831
|
+
/**
|
|
832
|
+
* 建立 WebSocket 连接
|
|
833
|
+
*/
|
|
834
|
+
async connect() {
|
|
835
|
+
if (this.state.isConnected) return;
|
|
836
|
+
this.updateState({
|
|
837
|
+
error: null,
|
|
838
|
+
progress: 0,
|
|
839
|
+
isSynthesizing: false,
|
|
840
|
+
isConnected: false,
|
|
841
|
+
isSessionStarted: false
|
|
842
|
+
});
|
|
843
|
+
this.streamText = "";
|
|
844
|
+
this.segmentQueue = [];
|
|
845
|
+
this.sessionAudioBuffers = [];
|
|
846
|
+
this.isStreamFinished = false;
|
|
847
|
+
this.isSessionFinished = false;
|
|
848
|
+
this.isSessionStarting = false;
|
|
849
|
+
if (this.client) {
|
|
850
|
+
this.client.close();
|
|
851
|
+
this.client = null;
|
|
852
|
+
}
|
|
853
|
+
this.splitter = new StreamingTextSplitter({
|
|
854
|
+
maxLength: this.config.maxSegmentLength || 150,
|
|
855
|
+
onSegmentComplete: (segment) => {
|
|
856
|
+
this.segmentQueue.push(segment);
|
|
857
|
+
if (this.state.isSessionStarted) {
|
|
858
|
+
this.processQueue();
|
|
859
|
+
}
|
|
643
860
|
}
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
};
|
|
662
|
-
utterance.onerror = (e) => {
|
|
663
|
-
console.error("[useMessageTTS] Fallback TTS failed", e);
|
|
664
|
-
setErrorState("Fallback TTS failed");
|
|
665
|
-
onError?.(new Error("Fallback TTS failed"));
|
|
666
|
-
setIsPlaying(false);
|
|
667
|
-
};
|
|
668
|
-
fallbackUtteranceRef.current = utterance;
|
|
669
|
-
window.speechSynthesis.speak(utterance);
|
|
670
|
-
},
|
|
671
|
-
[audioParams, instanceId, onError, onPlayEnd, onPlayStart, pause]
|
|
672
|
-
);
|
|
673
|
-
const executeTTS = useCallback3(
|
|
674
|
-
async (text, targetVoice) => {
|
|
675
|
-
stop();
|
|
676
|
-
stopOthers();
|
|
677
|
-
setErrorState(null);
|
|
678
|
-
setIsSynthesizing(true);
|
|
679
|
-
setProgress(0);
|
|
680
|
-
audioBuffersRef.current = [];
|
|
681
|
-
isFallbackRef.current = false;
|
|
682
|
-
const speed = audioParams?.speech_rate || 0;
|
|
683
|
-
const voice = targetVoice;
|
|
684
|
-
const cacheKey = TTSCache.generateKey(text, voice, speed);
|
|
685
|
-
cacheKeyRef.current = cacheKey;
|
|
686
|
-
const startTime = Date.now();
|
|
687
|
-
metricsCollector.record({
|
|
688
|
-
name: "tts_request",
|
|
689
|
-
labels: { voice, speed, text_length: text.length },
|
|
690
|
-
value: 1,
|
|
691
|
-
timestamp: startTime
|
|
692
|
-
});
|
|
861
|
+
});
|
|
862
|
+
this.client = WebsocketMSE2({ autoStartSession: false });
|
|
863
|
+
const { ttsConfig, audioParams } = this.config;
|
|
864
|
+
const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
|
|
865
|
+
const startTime = Date.now();
|
|
866
|
+
this.config.metricsCollector?.record({
|
|
867
|
+
name: "tts_request",
|
|
868
|
+
labels: { voice, text_length: 0 },
|
|
869
|
+
value: 1,
|
|
870
|
+
timestamp: startTime
|
|
871
|
+
});
|
|
872
|
+
return new Promise((resolve, reject) => {
|
|
873
|
+
const timeoutId = setTimeout(() => {
|
|
874
|
+
const err = new Error("WebSocket connection timeout (10s)");
|
|
875
|
+
this.updateState({ error: err.message });
|
|
876
|
+
reject(err);
|
|
877
|
+
}, 1e4);
|
|
693
878
|
try {
|
|
694
|
-
const
|
|
695
|
-
const audio = new Audio();
|
|
696
|
-
audio.crossOrigin = "anonymous";
|
|
697
|
-
audioRef.current = audio;
|
|
698
|
-
audio.onplay = () => {
|
|
699
|
-
setIsPlaying(true);
|
|
700
|
-
setIsPaused(false);
|
|
701
|
-
onPlayStart?.();
|
|
702
|
-
initAudioContext();
|
|
703
|
-
activeInstances.set(instanceId, { pause });
|
|
704
|
-
metricsCollector.record({
|
|
705
|
-
name: "tts_latency",
|
|
706
|
-
labels: { stage: "playback", voice, speed },
|
|
707
|
-
value: Date.now() - startTime,
|
|
708
|
-
timestamp: Date.now()
|
|
709
|
-
});
|
|
710
|
-
};
|
|
711
|
-
audio.onpause = () => {
|
|
712
|
-
if (!audio.ended) {
|
|
713
|
-
}
|
|
714
|
-
};
|
|
715
|
-
audio.onended = () => {
|
|
716
|
-
setIsPlaying(false);
|
|
717
|
-
setIsPaused(false);
|
|
718
|
-
onPlayEnd?.();
|
|
719
|
-
activeInstances.delete(instanceId);
|
|
720
|
-
};
|
|
721
|
-
audio.onerror = (e) => {
|
|
722
|
-
console.error("Audio playback error:", e, audio.error);
|
|
723
|
-
metricsCollector.record({
|
|
724
|
-
name: "tts_error",
|
|
725
|
-
labels: {
|
|
726
|
-
error_code: "playback_error",
|
|
727
|
-
voice,
|
|
728
|
-
detail: audio.error?.message || String(audio.error?.code)
|
|
729
|
-
},
|
|
730
|
-
value: 1,
|
|
731
|
-
timestamp: Date.now()
|
|
732
|
-
});
|
|
733
|
-
handleError(text, voice);
|
|
734
|
-
};
|
|
735
|
-
audio.ontimeupdate = () => {
|
|
736
|
-
let duration = audio.duration;
|
|
737
|
-
if (!isFinite(duration)) {
|
|
738
|
-
if (audio.buffered.length > 0) {
|
|
739
|
-
duration = audio.buffered.end(audio.buffered.length - 1);
|
|
740
|
-
}
|
|
741
|
-
}
|
|
742
|
-
if (isFinite(duration) && duration > 0) {
|
|
743
|
-
setProgress(audio.currentTime / duration * 100);
|
|
744
|
-
}
|
|
745
|
-
};
|
|
746
|
-
if (cachedData) {
|
|
747
|
-
const totalSize = cachedData.reduce(
|
|
748
|
-
(acc, buf) => acc + buf.byteLength,
|
|
749
|
-
0
|
|
750
|
-
);
|
|
751
|
-
metricsCollector.record({
|
|
752
|
-
name: "tts_cache_hit",
|
|
753
|
-
labels: { voice, speed },
|
|
754
|
-
value: 1,
|
|
755
|
-
timestamp: Date.now()
|
|
756
|
-
});
|
|
757
|
-
console.log(
|
|
758
|
-
JSON.stringify({
|
|
759
|
-
event: "tts_cache_hit",
|
|
760
|
-
cache_hit: true,
|
|
761
|
-
text_len: text.length,
|
|
762
|
-
voice,
|
|
763
|
-
speed,
|
|
764
|
-
data_size: totalSize
|
|
765
|
-
})
|
|
766
|
-
);
|
|
767
|
-
if (totalSize === 0) {
|
|
768
|
-
console.warn(
|
|
769
|
-
"[useMessageTTS] Cached data is empty, falling back to stream"
|
|
770
|
-
);
|
|
771
|
-
} else {
|
|
772
|
-
const blob = new Blob(cachedData, { type: "audio/mpeg" });
|
|
773
|
-
const url2 = URL.createObjectURL(blob);
|
|
774
|
-
audioUrlRef.current = url2;
|
|
775
|
-
audio.src = url2;
|
|
776
|
-
setIsSynthesizing(false);
|
|
777
|
-
if (autoPlay) {
|
|
778
|
-
try {
|
|
779
|
-
await audio.play();
|
|
780
|
-
} catch (err) {
|
|
781
|
-
console.warn("AutoPlay blocked", err);
|
|
782
|
-
}
|
|
783
|
-
}
|
|
784
|
-
return;
|
|
785
|
-
}
|
|
786
|
-
}
|
|
787
|
-
console.log("[useMessageTTS] Cache miss, starting stream");
|
|
788
|
-
clientRef.current = WebsocketMSE2({ autoStartSession: true });
|
|
789
|
-
const formattedText = MarkdownFormatter2.format(text).replace(
|
|
790
|
-
emojiRegex2(),
|
|
791
|
-
""
|
|
792
|
-
);
|
|
793
|
-
const segments = splitTextByDelimiters(formattedText);
|
|
794
|
-
const url = clientRef.current.start({
|
|
879
|
+
const url = this.client.start({
|
|
795
880
|
url: buildFullUrl2(WS_URL, {
|
|
796
881
|
api_access_key: `Jwt; ${ttsConfig.token}`,
|
|
797
882
|
api_app_key: ttsConfig.appid,
|
|
798
883
|
api_resource_id: ttsConfig.resourceId || "seed-tts-2.0"
|
|
799
884
|
}),
|
|
800
885
|
config: {
|
|
801
|
-
user: {
|
|
802
|
-
uid: `req-${Date.now()}`
|
|
803
|
-
},
|
|
886
|
+
user: { uid: `req-${Date.now()}` },
|
|
804
887
|
namespace: ttsConfig.namespace || "BidirectionalTTS",
|
|
805
888
|
req_params: {
|
|
806
889
|
speaker: voice,
|
|
@@ -815,456 +898,362 @@ function useMessageTTS({
|
|
|
815
898
|
enable_language_detector: true,
|
|
816
899
|
disable_markdown_filter: true,
|
|
817
900
|
enable_latex_tn: true
|
|
818
|
-
// max_length_to_filter_parenthesis: 100,
|
|
819
901
|
})
|
|
820
902
|
}
|
|
821
903
|
},
|
|
904
|
+
onStart: () => {
|
|
905
|
+
this.updateState({ isConnected: true });
|
|
906
|
+
},
|
|
907
|
+
onConnectionReady: () => {
|
|
908
|
+
clearTimeout(timeoutId);
|
|
909
|
+
resolve();
|
|
910
|
+
},
|
|
822
911
|
onSessionStarted: () => {
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
912
|
+
this.updateState({ isSessionStarted: true });
|
|
913
|
+
this.isSessionStarting = false;
|
|
914
|
+
if (this.segmentQueue.length > 0) {
|
|
915
|
+
this.processQueue();
|
|
916
|
+
}
|
|
827
917
|
},
|
|
828
918
|
onMessage: (data) => {
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
);
|
|
919
|
+
this.updateState({ isSynthesizing: true });
|
|
920
|
+
if (this.sessionAudioBuffers.length === 0) {
|
|
921
|
+
this.config.metricsCollector?.record({
|
|
922
|
+
name: "tts_latency",
|
|
923
|
+
labels: { stage: "first_packet", voice },
|
|
924
|
+
value: Date.now() - startTime,
|
|
925
|
+
timestamp: Date.now()
|
|
926
|
+
});
|
|
837
927
|
}
|
|
838
928
|
const buffer = data instanceof ArrayBuffer ? data.slice(0) : new Uint8Array(data).buffer;
|
|
839
|
-
|
|
929
|
+
this.sessionAudioBuffers.push(buffer);
|
|
840
930
|
},
|
|
841
931
|
onSessionFinished: () => {
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
}
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
text_len: text.length,
|
|
851
|
-
duration_ms: Date.now() - startTime,
|
|
932
|
+
this.updateState({
|
|
933
|
+
isSynthesizing: false,
|
|
934
|
+
isSessionStarted: false
|
|
935
|
+
});
|
|
936
|
+
if (this.sessionAudioBuffers.length > 0 && this.streamText) {
|
|
937
|
+
const speed = audioParams?.speech_rate || 0;
|
|
938
|
+
const cacheKey = TTSCache.generateKey(
|
|
939
|
+
this.streamText,
|
|
852
940
|
voice,
|
|
853
941
|
speed
|
|
854
|
-
|
|
855
|
-
|
|
942
|
+
);
|
|
943
|
+
TTSCache.set(cacheKey, [...this.sessionAudioBuffers]);
|
|
944
|
+
}
|
|
945
|
+
this.config.metricsCollector?.record({
|
|
946
|
+
name: "tts_synthesis_finished",
|
|
947
|
+
labels: { voice, text_length: this.streamText.length },
|
|
948
|
+
value: Date.now() - startTime,
|
|
949
|
+
timestamp: Date.now()
|
|
950
|
+
});
|
|
856
951
|
},
|
|
857
952
|
onError: (err) => {
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
953
|
+
if (!this.state.isConnected) {
|
|
954
|
+
clearTimeout(timeoutId);
|
|
955
|
+
reject(new Error(err.msg || "TTS error"));
|
|
956
|
+
}
|
|
957
|
+
console.error("[PlaybackSession] TTS error:", err);
|
|
958
|
+
this.updateState({
|
|
959
|
+
error: err.msg || "TTS error",
|
|
960
|
+
isSynthesizing: false
|
|
864
961
|
});
|
|
865
|
-
|
|
866
|
-
|
|
962
|
+
this.config.onError?.(new Error(err.msg || "TTS error"));
|
|
963
|
+
},
|
|
964
|
+
onWSError: (err) => {
|
|
965
|
+
if (!this.state.isConnected) {
|
|
966
|
+
clearTimeout(timeoutId);
|
|
967
|
+
reject(err instanceof Error ? err : new Error("WebSocket error"));
|
|
968
|
+
}
|
|
867
969
|
}
|
|
868
970
|
});
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
971
|
+
if (this.audioUrl) {
|
|
972
|
+
URL.revokeObjectURL(this.audioUrl);
|
|
973
|
+
}
|
|
974
|
+
this.audioUrl = url;
|
|
975
|
+
this.audio.src = url;
|
|
976
|
+
if (this.config.autoPlay !== false) {
|
|
977
|
+
this.audio.play().catch(
|
|
978
|
+
(e) => console.warn("[PlaybackSession] Autoplay blocked:", e)
|
|
979
|
+
);
|
|
877
980
|
}
|
|
878
981
|
} catch (err) {
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
timestamp: Date.now()
|
|
885
|
-
});
|
|
886
|
-
handleError(text, voice);
|
|
887
|
-
}
|
|
888
|
-
},
|
|
889
|
-
[
|
|
890
|
-
ttsConfig,
|
|
891
|
-
audioParams,
|
|
892
|
-
autoPlay,
|
|
893
|
-
stop,
|
|
894
|
-
stopOthers,
|
|
895
|
-
instanceId,
|
|
896
|
-
onPlayStart,
|
|
897
|
-
onPlayEnd,
|
|
898
|
-
initAudioContext,
|
|
899
|
-
pause,
|
|
900
|
-
fallbackVoice,
|
|
901
|
-
metricsCollector
|
|
902
|
-
]
|
|
903
|
-
);
|
|
904
|
-
const handleError = useCallback3(
|
|
905
|
-
(text, failedVoice) => {
|
|
906
|
-
if (fallbackVoice && failedVoice !== fallbackVoice) {
|
|
907
|
-
console.warn(
|
|
908
|
-
`[useMessageTTS] Voice ${failedVoice} failed, switching to fallback voice ${fallbackVoice}`
|
|
982
|
+
clearTimeout(timeoutId);
|
|
983
|
+
console.error("[PlaybackSession] Connect error:", err);
|
|
984
|
+
this.updateState({ error: String(err) });
|
|
985
|
+
this.config.onError?.(
|
|
986
|
+
err instanceof Error ? err : new Error(String(err))
|
|
909
987
|
);
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
988
|
+
reject(err);
|
|
989
|
+
}
|
|
990
|
+
});
|
|
991
|
+
}
|
|
992
|
+
/**
|
|
993
|
+
* 发送流式文本
|
|
994
|
+
*/
|
|
995
|
+
handleStreamChunk(chunk) {
|
|
996
|
+
if (!chunk) return;
|
|
997
|
+
this.streamText += chunk;
|
|
998
|
+
if (!this.state.isSessionStarted && !this.isSessionStarting && this.client && this.state.isConnected && !this.isSessionFinished) {
|
|
999
|
+
this.isSessionStarting = true;
|
|
1000
|
+
this.client.startSession();
|
|
1001
|
+
}
|
|
1002
|
+
this.splitter?.onChunk(chunk);
|
|
1003
|
+
if (this.state.isSessionStarted) {
|
|
1004
|
+
this.processQueue();
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
/**
|
|
1008
|
+
* 结束流式输入
|
|
1009
|
+
*/
|
|
1010
|
+
async finishStream() {
|
|
1011
|
+
this.isStreamFinished = true;
|
|
1012
|
+
this.updateState({ isStreamFinished: true });
|
|
1013
|
+
this.splitter?.complete();
|
|
1014
|
+
if (this.state.isSessionStarted) {
|
|
1015
|
+
this.processQueue();
|
|
1016
|
+
}
|
|
1017
|
+
if (this.segmentQueue.length > 0 || this.isSending) {
|
|
1018
|
+
await new Promise((resolve) => {
|
|
1019
|
+
this.resolveAllSegmentsSent = resolve;
|
|
1020
|
+
});
|
|
1021
|
+
} else if (this.client && this.state.isSessionStarted && !this.isSessionFinished) {
|
|
1022
|
+
this.isSessionFinished = true;
|
|
1023
|
+
this.client.finishSession();
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
/**
|
|
1027
|
+
* 处理非流式播放(直接播放整段文本)
|
|
1028
|
+
*/
|
|
1029
|
+
async play(text) {
|
|
1030
|
+
const formattedText = MarkdownFormatter3.format(text).replace(
|
|
1031
|
+
emojiRegex3(),
|
|
1032
|
+
""
|
|
1033
|
+
);
|
|
1034
|
+
const { audioParams } = this.config;
|
|
1035
|
+
const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
|
|
1036
|
+
const speed = audioParams?.speech_rate || 0;
|
|
1037
|
+
const cacheKey = TTSCache.generateKey(formattedText, voice, speed);
|
|
1038
|
+
const cachedData = await TTSCache.get(cacheKey);
|
|
1039
|
+
if (cachedData && cachedData.length > 0) {
|
|
1040
|
+
const blob = new Blob(cachedData, { type: "audio/mpeg" });
|
|
1041
|
+
const url = URL.createObjectURL(blob);
|
|
1042
|
+
if (this.audioUrl) URL.revokeObjectURL(this.audioUrl);
|
|
1043
|
+
this.audioUrl = url;
|
|
1044
|
+
this.audio.src = url;
|
|
1045
|
+
this.updateState({ isSynthesizing: false });
|
|
1046
|
+
if (this.config.autoPlay !== false) {
|
|
1047
|
+
try {
|
|
1048
|
+
await this.audio.play();
|
|
1049
|
+
} catch (e) {
|
|
1050
|
+
console.warn("Autoplay blocked", e);
|
|
917
1051
|
}
|
|
918
|
-
executeTTS(text, fallbackVoice);
|
|
919
|
-
} else {
|
|
920
|
-
playFallback(text);
|
|
921
1052
|
}
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
(
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
1053
|
+
return;
|
|
1054
|
+
}
|
|
1055
|
+
await this.connect();
|
|
1056
|
+
this.streamText = formattedText;
|
|
1057
|
+
const segments = splitTextByDelimiters(formattedText);
|
|
1058
|
+
if (this.state.isConnected) {
|
|
1059
|
+
if (!this.state.isSessionStarted && !this.isSessionStarting) {
|
|
1060
|
+
this.isSessionStarting = true;
|
|
1061
|
+
this.client?.startSession();
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1064
|
+
segments.forEach((seg, idx) => {
|
|
1065
|
+
this.segmentQueue.push({
|
|
1066
|
+
index: idx,
|
|
1067
|
+
content: seg.content,
|
|
1068
|
+
length: seg.content.length,
|
|
1069
|
+
sent: false
|
|
1070
|
+
});
|
|
1071
|
+
});
|
|
1072
|
+
if (this.state.isSessionStarted) {
|
|
1073
|
+
this.processQueue();
|
|
1074
|
+
}
|
|
1075
|
+
await this.finishStream();
|
|
1076
|
+
}
|
|
1077
|
+
processQueue() {
|
|
1078
|
+
if (!this.client || !this.state.isSessionStarted || this.isSending || this.isSessionFinished) {
|
|
1079
|
+
return;
|
|
1080
|
+
}
|
|
1081
|
+
if (this.segmentQueue.length === 0) {
|
|
1082
|
+
if (this.isStreamFinished && !this.isSessionFinished) {
|
|
1083
|
+
this.isSessionFinished = true;
|
|
1084
|
+
this.client.finishSession();
|
|
1085
|
+
this.resolveAllSegmentsSent?.();
|
|
1086
|
+
}
|
|
1087
|
+
return;
|
|
1088
|
+
}
|
|
1089
|
+
this.isSending = true;
|
|
1090
|
+
const segment = this.segmentQueue.shift();
|
|
1091
|
+
this.client.sendText(segment.content);
|
|
1092
|
+
segment.sent = true;
|
|
1093
|
+
this.isSending = false;
|
|
1094
|
+
setTimeout(() => this.processQueue(), 0);
|
|
1095
|
+
}
|
|
1096
|
+
pause() {
|
|
1097
|
+
this.audio.pause();
|
|
1098
|
+
this.updateState({ isPaused: true, isPlaying: false });
|
|
1099
|
+
}
|
|
1100
|
+
resume() {
|
|
1101
|
+
this.audio.play();
|
|
1102
|
+
this.updateState({ isPaused: false, isPlaying: true });
|
|
1103
|
+
}
|
|
1104
|
+
stop() {
|
|
1105
|
+
if (this.client) {
|
|
1106
|
+
this.client.close();
|
|
1107
|
+
this.client = null;
|
|
1108
|
+
}
|
|
1109
|
+
this.audio.pause();
|
|
1110
|
+
this.audio.currentTime = 0;
|
|
1111
|
+
if (this.audioUrl) {
|
|
1112
|
+
URL.revokeObjectURL(this.audioUrl);
|
|
1113
|
+
this.audioUrl = null;
|
|
1114
|
+
}
|
|
1115
|
+
this.stopVisualizationLoop();
|
|
1116
|
+
this.audioContext?.close();
|
|
1117
|
+
this.audioContext = null;
|
|
1118
|
+
this.updateState({
|
|
1119
|
+
isPlaying: false,
|
|
1120
|
+
isPaused: false,
|
|
1121
|
+
isSynthesizing: false,
|
|
1122
|
+
progress: 0,
|
|
1123
|
+
isConnected: false,
|
|
1124
|
+
isSessionStarted: false
|
|
1125
|
+
});
|
|
1126
|
+
}
|
|
1127
|
+
seek(percentage) {
|
|
1128
|
+
let duration = this.audio.duration;
|
|
1129
|
+
if (!isFinite(duration) && this.audio.buffered.length > 0) {
|
|
1130
|
+
duration = this.audio.buffered.end(this.audio.buffered.length - 1);
|
|
1131
|
+
}
|
|
1132
|
+
if (isFinite(duration) && duration > 0) {
|
|
1133
|
+
const time = percentage / 100 * duration;
|
|
1134
|
+
if (isFinite(time)) {
|
|
1135
|
+
this.audio.currentTime = time;
|
|
1136
|
+
this.updateState({ progress: percentage });
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
updateState(partial) {
|
|
1141
|
+
this.state = { ...this.state, ...partial };
|
|
1142
|
+
this.notifyListeners();
|
|
1143
|
+
}
|
|
1144
|
+
subscribe(listener) {
|
|
1145
|
+
this.listeners.add(listener);
|
|
1146
|
+
listener(this.state);
|
|
1147
|
+
return () => this.listeners.delete(listener);
|
|
1148
|
+
}
|
|
1149
|
+
notifyListeners() {
|
|
1150
|
+
this.listeners.forEach((l) => l(this.state));
|
|
1151
|
+
}
|
|
1152
|
+
// Visualization
|
|
1153
|
+
getFrequencyData() {
|
|
1154
|
+
if (!this.analyser) return new Uint8Array(0);
|
|
1155
|
+
const data = new Uint8Array(this.analyser.frequencyBinCount);
|
|
1156
|
+
this.analyser.getByteFrequencyData(data);
|
|
1157
|
+
return data;
|
|
1158
|
+
}
|
|
1159
|
+
getTimeDomainData() {
|
|
1160
|
+
if (!this.analyser) return new Uint8Array(0);
|
|
1161
|
+
const data = new Uint8Array(this.analyser.frequencyBinCount);
|
|
1162
|
+
this.analyser.getByteTimeDomainData(data);
|
|
1163
|
+
return data;
|
|
1164
|
+
}
|
|
1165
|
+
startVisualizationLoop() {
|
|
1166
|
+
if (!this.config.visualization?.enabled) return;
|
|
949
1167
|
const update = (timestamp) => {
|
|
950
|
-
if (isPlaying && !isPaused) {
|
|
951
|
-
if (timestamp -
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
1168
|
+
if (this.state.isPlaying && !this.state.isPaused) {
|
|
1169
|
+
if (timestamp - this.lastVisUpdate >= (this.config.visualization?.refreshInterval || 0)) {
|
|
1170
|
+
this.updateState({
|
|
1171
|
+
visualizationData: {
|
|
1172
|
+
frequencyData: this.getFrequencyData(),
|
|
1173
|
+
timeDomainData: this.getTimeDomainData()
|
|
1174
|
+
}
|
|
955
1175
|
});
|
|
956
|
-
|
|
1176
|
+
this.lastVisUpdate = timestamp;
|
|
957
1177
|
}
|
|
958
|
-
animId = requestAnimationFrame(update);
|
|
1178
|
+
this.animId = requestAnimationFrame(update);
|
|
959
1179
|
}
|
|
960
1180
|
};
|
|
961
|
-
|
|
962
|
-
|
|
1181
|
+
this.animId = requestAnimationFrame(update);
|
|
1182
|
+
}
|
|
1183
|
+
stopVisualizationLoop() {
|
|
1184
|
+
if (this.animId) {
|
|
1185
|
+
cancelAnimationFrame(this.animId);
|
|
1186
|
+
this.animId = null;
|
|
963
1187
|
}
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
1188
|
+
}
|
|
1189
|
+
};
|
|
1190
|
+
var StreamPlaybackManagerImpl = class {
|
|
1191
|
+
constructor() {
|
|
1192
|
+
this.sessions = /* @__PURE__ */ new Map();
|
|
1193
|
+
this.activeStreamId = null;
|
|
1194
|
+
}
|
|
1195
|
+
/**
|
|
1196
|
+
* 创建新的播放会话
|
|
1197
|
+
*/
|
|
1198
|
+
createSession(id, config) {
|
|
1199
|
+
if (this.activeStreamId && this.activeStreamId !== id) {
|
|
1200
|
+
this.pause(this.activeStreamId);
|
|
1201
|
+
}
|
|
1202
|
+
const session = new PlaybackSession(id, config);
|
|
1203
|
+
this.sessions.set(id, session);
|
|
1204
|
+
this.activeStreamId = id;
|
|
1205
|
+
return session;
|
|
1206
|
+
}
|
|
1207
|
+
/**
|
|
1208
|
+
* 获取会话
|
|
1209
|
+
*/
|
|
1210
|
+
getSession(id) {
|
|
1211
|
+
return this.sessions.get(id);
|
|
1212
|
+
}
|
|
1213
|
+
/**
|
|
1214
|
+
* 停止会话
|
|
1215
|
+
*/
|
|
1216
|
+
stop(id) {
|
|
1217
|
+
const session = this.sessions.get(id);
|
|
1218
|
+
if (session) {
|
|
1219
|
+
session.stop();
|
|
1220
|
+
this.sessions.delete(id);
|
|
1221
|
+
if (this.activeStreamId === id) {
|
|
1222
|
+
this.activeStreamId = null;
|
|
990
1223
|
}
|
|
991
1224
|
}
|
|
992
|
-
}, []);
|
|
993
|
-
return {
|
|
994
|
-
isPlaying,
|
|
995
|
-
isPaused,
|
|
996
|
-
isSynthesizing,
|
|
997
|
-
error,
|
|
998
|
-
play,
|
|
999
|
-
pause,
|
|
1000
|
-
resume,
|
|
1001
|
-
stop,
|
|
1002
|
-
togglePlay,
|
|
1003
|
-
seek,
|
|
1004
|
-
progress,
|
|
1005
|
-
getFrequencyData,
|
|
1006
|
-
getTimeDomainData,
|
|
1007
|
-
visualizationData
|
|
1008
|
-
};
|
|
1009
|
-
}
|
|
1010
|
-
|
|
1011
|
-
// src/tts/useStreamTTS.ts
|
|
1012
|
-
import { WebsocketMSE as WebsocketMSE3 } from "@wq-hook/volcano-sdk/tts";
|
|
1013
|
-
import { useCallback as useCallback4, useEffect as useEffect3, useRef as useRef4, useState as useState4 } from "react";
|
|
1014
|
-
|
|
1015
|
-
// src/tts/StreamingTextSplitter.ts
|
|
1016
|
-
import { MarkdownFormatter as MarkdownFormatter3 } from "@wq-hook/volcano-sdk";
|
|
1017
|
-
import emojiRegex3 from "emoji-regex";
|
|
1018
|
-
var StreamingTextSplitter = class {
|
|
1019
|
-
constructor(options = {}) {
|
|
1020
|
-
/** 当前缓冲区 */
|
|
1021
|
-
this.buffer = "";
|
|
1022
|
-
/** 分段索引计数器 */
|
|
1023
|
-
this.segmentIndex = 0;
|
|
1024
|
-
/** 已完成的分段列表 */
|
|
1025
|
-
this.segments = [];
|
|
1026
|
-
/** 是否已完成 */
|
|
1027
|
-
this.isCompleted = false;
|
|
1028
|
-
this.maxLength = options.maxLength || 150;
|
|
1029
|
-
this.minLength = options.minLength || 10;
|
|
1030
|
-
this.onSegmentComplete = options.onSegmentComplete;
|
|
1031
|
-
this.onAllComplete = options.onAllComplete;
|
|
1032
|
-
}
|
|
1033
|
-
/**
|
|
1034
|
-
* 接收流式文本块
|
|
1035
|
-
* @param chunk - 文本块
|
|
1036
|
-
*/
|
|
1037
|
-
onChunk(chunk) {
|
|
1038
|
-
if (!chunk || this.isCompleted) return;
|
|
1039
|
-
this.buffer += chunk;
|
|
1040
|
-
if (this.detectBoundary(chunk)) {
|
|
1041
|
-
const newlineIndex = this.buffer.indexOf("\n");
|
|
1042
|
-
if (newlineIndex !== -1) {
|
|
1043
|
-
if (newlineIndex === 0) {
|
|
1044
|
-
this.buffer = this.buffer.substring(1);
|
|
1045
|
-
return;
|
|
1046
|
-
}
|
|
1047
|
-
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
1048
|
-
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
1049
|
-
this.flushSegmentWithBuffer(segmentBuffer);
|
|
1050
|
-
while (this.buffer.includes("\n")) {
|
|
1051
|
-
const nextNewlineIndex = this.buffer.indexOf("\n");
|
|
1052
|
-
if (nextNewlineIndex === 0) {
|
|
1053
|
-
this.buffer = this.buffer.substring(1);
|
|
1054
|
-
continue;
|
|
1055
|
-
}
|
|
1056
|
-
const nextSegmentBuffer = this.buffer.substring(0, nextNewlineIndex);
|
|
1057
|
-
this.buffer = this.buffer.substring(nextNewlineIndex + 1);
|
|
1058
|
-
this.flushSegmentWithBuffer(nextSegmentBuffer);
|
|
1059
|
-
}
|
|
1060
|
-
}
|
|
1061
|
-
}
|
|
1062
|
-
}
|
|
1063
|
-
/**
|
|
1064
|
-
* 检测分段边界
|
|
1065
|
-
* @param chunk - 最新接收的文本块
|
|
1066
|
-
* @returns 是否应该分段
|
|
1067
|
-
*/
|
|
1068
|
-
detectBoundary(chunk) {
|
|
1069
|
-
if (chunk.includes("\n")) {
|
|
1070
|
-
if (this.buffer.length >= this.maxLength) {
|
|
1071
|
-
this.forceSplitAtSentenceBoundary();
|
|
1072
|
-
}
|
|
1073
|
-
return true;
|
|
1074
|
-
}
|
|
1075
|
-
if (this.buffer.length >= this.maxLength) {
|
|
1076
|
-
this.forceSplitAtSentenceBoundary();
|
|
1077
|
-
return true;
|
|
1078
|
-
}
|
|
1079
|
-
return false;
|
|
1080
1225
|
}
|
|
1081
1226
|
/**
|
|
1082
|
-
*
|
|
1227
|
+
* 暂停会话
|
|
1083
1228
|
*/
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
const sentenceEnders = /[。?!]/g;
|
|
1087
|
-
let lastMatch = null;
|
|
1088
|
-
let match = null;
|
|
1089
|
-
while ((match = sentenceEnders.exec(content)) !== null) {
|
|
1090
|
-
lastMatch = match;
|
|
1091
|
-
}
|
|
1092
|
-
if (lastMatch && lastMatch.index > this.minLength) {
|
|
1093
|
-
const splitPoint = lastMatch.index + 1;
|
|
1094
|
-
const firstPart = content.substring(0, splitPoint);
|
|
1095
|
-
const secondPart = content.substring(splitPoint);
|
|
1096
|
-
this.buffer = firstPart;
|
|
1097
|
-
this.flushSegment();
|
|
1098
|
-
this.buffer = secondPart;
|
|
1099
|
-
} else {
|
|
1100
|
-
const midPoint = Math.floor(content.length / 2);
|
|
1101
|
-
const firstPart = content.substring(0, midPoint);
|
|
1102
|
-
const secondPart = content.substring(midPoint);
|
|
1103
|
-
this.buffer = firstPart;
|
|
1104
|
-
this.flushSegment();
|
|
1105
|
-
this.buffer = secondPart;
|
|
1106
|
-
}
|
|
1229
|
+
pause(id) {
|
|
1230
|
+
this.sessions.get(id)?.pause();
|
|
1107
1231
|
}
|
|
1108
1232
|
/**
|
|
1109
|
-
*
|
|
1110
|
-
* @param bufferToFlush - 要分段的缓冲区内容
|
|
1233
|
+
* 恢复会话
|
|
1111
1234
|
*/
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
1116
|
-
const isTooShort = content.length < 3;
|
|
1117
|
-
if (isPureSymbols && isTooShort) {
|
|
1118
|
-
return;
|
|
1119
|
-
}
|
|
1120
|
-
const formattedContent = MarkdownFormatter3.format(content).replace(emojiRegex3(), "");
|
|
1121
|
-
if (!formattedContent) return;
|
|
1122
|
-
let subSegments = [formattedContent];
|
|
1123
|
-
if (formattedContent.length > this.maxLength) {
|
|
1124
|
-
subSegments = this.splitLongSegment(formattedContent);
|
|
1125
|
-
}
|
|
1126
|
-
for (const subSegment of subSegments) {
|
|
1127
|
-
if (!subSegment) continue;
|
|
1128
|
-
const segment = {
|
|
1129
|
-
index: this.segmentIndex++,
|
|
1130
|
-
content: subSegment,
|
|
1131
|
-
length: subSegment.length,
|
|
1132
|
-
sent: false
|
|
1133
|
-
};
|
|
1134
|
-
this.segments.push(segment);
|
|
1135
|
-
this.onSegmentComplete?.(segment);
|
|
1235
|
+
resume(id) {
|
|
1236
|
+
if (this.activeStreamId && this.activeStreamId !== id) {
|
|
1237
|
+
this.pause(this.activeStreamId);
|
|
1136
1238
|
}
|
|
1239
|
+
this.sessions.get(id)?.resume();
|
|
1240
|
+
this.activeStreamId = id;
|
|
1137
1241
|
}
|
|
1138
1242
|
/**
|
|
1139
|
-
*
|
|
1243
|
+
* 注册(兼容旧 API,但推荐直接用 createSession)
|
|
1244
|
+
* 为了兼容 useMessageTTS 旧逻辑,这里可以保留一些别名,但我们会重构 hook,所以可以改变 API。
|
|
1140
1245
|
*/
|
|
1141
|
-
flushSegment() {
|
|
1142
|
-
const content = this.buffer.trim();
|
|
1143
|
-
if (!content) {
|
|
1144
|
-
this.buffer = "";
|
|
1145
|
-
return;
|
|
1146
|
-
}
|
|
1147
|
-
const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
|
|
1148
|
-
const isTooShort = content.length < 3;
|
|
1149
|
-
if (isPureSymbols && isTooShort) {
|
|
1150
|
-
this.buffer = "";
|
|
1151
|
-
return;
|
|
1152
|
-
}
|
|
1153
|
-
const formattedContent = MarkdownFormatter3.format(content).replace(emojiRegex3(), "");
|
|
1154
|
-
if (!formattedContent) {
|
|
1155
|
-
this.buffer = "";
|
|
1156
|
-
return;
|
|
1157
|
-
}
|
|
1158
|
-
let subSegments = [formattedContent];
|
|
1159
|
-
if (formattedContent.length > this.maxLength) {
|
|
1160
|
-
subSegments = this.splitLongSegment(formattedContent);
|
|
1161
|
-
}
|
|
1162
|
-
for (const subSegment of subSegments) {
|
|
1163
|
-
if (!subSegment) continue;
|
|
1164
|
-
const segment = {
|
|
1165
|
-
index: this.segmentIndex++,
|
|
1166
|
-
content: subSegment,
|
|
1167
|
-
length: subSegment.length,
|
|
1168
|
-
sent: false
|
|
1169
|
-
};
|
|
1170
|
-
this.segments.push(segment);
|
|
1171
|
-
this.onSegmentComplete?.(segment);
|
|
1172
|
-
}
|
|
1173
|
-
this.buffer = "";
|
|
1174
|
-
}
|
|
1175
|
-
/**
|
|
1176
|
-
* 拆分超长分段
|
|
1177
|
-
* @param segment - 超长的分段
|
|
1178
|
-
* @returns 拆分后的分段数组
|
|
1179
|
-
*/
|
|
1180
|
-
splitLongSegment(segment) {
|
|
1181
|
-
const result = [];
|
|
1182
|
-
let current = "";
|
|
1183
|
-
for (const char of segment) {
|
|
1184
|
-
current += char;
|
|
1185
|
-
const shouldSplit = /[。?!,,]/.test(char);
|
|
1186
|
-
if (shouldSplit && current.length <= this.maxLength) {
|
|
1187
|
-
result.push(current);
|
|
1188
|
-
current = "";
|
|
1189
|
-
} else if (current.length >= this.maxLength) {
|
|
1190
|
-
result.push(current);
|
|
1191
|
-
current = "";
|
|
1192
|
-
}
|
|
1193
|
-
}
|
|
1194
|
-
if (current) {
|
|
1195
|
-
result.push(current);
|
|
1196
|
-
}
|
|
1197
|
-
return result.filter((s) => s.length > 0);
|
|
1198
|
-
}
|
|
1199
|
-
/**
|
|
1200
|
-
* 完成流式输入
|
|
1201
|
-
* 处理剩余的缓冲区内容
|
|
1202
|
-
*/
|
|
1203
|
-
complete() {
|
|
1204
|
-
if (this.isCompleted) return;
|
|
1205
|
-
this.isCompleted = true;
|
|
1206
|
-
while (this.buffer.includes("\n")) {
|
|
1207
|
-
const newlineIndex = this.buffer.indexOf("\n");
|
|
1208
|
-
if (newlineIndex === 0) {
|
|
1209
|
-
this.buffer = this.buffer.substring(1);
|
|
1210
|
-
continue;
|
|
1211
|
-
}
|
|
1212
|
-
const segmentBuffer = this.buffer.substring(0, newlineIndex);
|
|
1213
|
-
this.buffer = this.buffer.substring(newlineIndex + 1);
|
|
1214
|
-
this.flushSegmentWithBuffer(segmentBuffer);
|
|
1215
|
-
}
|
|
1216
|
-
if (this.buffer.trim()) {
|
|
1217
|
-
this.flushSegment();
|
|
1218
|
-
}
|
|
1219
|
-
this.onAllComplete?.(this.segments);
|
|
1220
|
-
}
|
|
1221
|
-
/**
|
|
1222
|
-
* 重置分段器状态
|
|
1223
|
-
*/
|
|
1224
|
-
reset() {
|
|
1225
|
-
this.buffer = "";
|
|
1226
|
-
this.segmentIndex = 0;
|
|
1227
|
-
this.segments = [];
|
|
1228
|
-
this.isCompleted = false;
|
|
1229
|
-
}
|
|
1230
|
-
/**
|
|
1231
|
-
* 获取当前缓冲区内容
|
|
1232
|
-
*/
|
|
1233
|
-
getBuffer() {
|
|
1234
|
-
return this.buffer;
|
|
1235
|
-
}
|
|
1236
|
-
/**
|
|
1237
|
-
* 获取已分段的列表
|
|
1238
|
-
*/
|
|
1239
|
-
getSegments() {
|
|
1240
|
-
return this.segments;
|
|
1241
|
-
}
|
|
1242
|
-
/**
|
|
1243
|
-
* 获取统计信息
|
|
1244
|
-
*/
|
|
1245
|
-
getStats() {
|
|
1246
|
-
return {
|
|
1247
|
-
bufferLength: this.buffer.length,
|
|
1248
|
-
segmentCount: this.segments.length,
|
|
1249
|
-
totalChars: this.segments.reduce((sum, seg) => sum + seg.length, 0)
|
|
1250
|
-
};
|
|
1251
|
-
}
|
|
1252
1246
|
};
|
|
1247
|
+
var StreamPlaybackManager = new StreamPlaybackManagerImpl();
|
|
1253
1248
|
|
|
1254
|
-
// src/tts/
|
|
1255
|
-
var
|
|
1256
|
-
|
|
1257
|
-
var sessionAudioCache = /* @__PURE__ */ new Map();
|
|
1258
|
-
function buildFullUrl3(url, params) {
|
|
1259
|
-
const arr = [];
|
|
1260
|
-
for (const key in params) {
|
|
1261
|
-
if (Object.prototype.hasOwnProperty.call(params, key)) {
|
|
1262
|
-
arr.push(`${key}=${encodeURIComponent(params[key])}`);
|
|
1263
|
-
}
|
|
1249
|
+
// src/tts/Metrics.ts
|
|
1250
|
+
var NoopMetricsCollector = class {
|
|
1251
|
+
record(_metric) {
|
|
1264
1252
|
}
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1253
|
+
};
|
|
1254
|
+
|
|
1255
|
+
// src/tts/useMessageTTS.ts
|
|
1256
|
+
function useMessageTTS({
|
|
1268
1257
|
ttsConfig,
|
|
1269
1258
|
audioParams,
|
|
1270
1259
|
autoPlay = true,
|
|
@@ -1273,431 +1262,427 @@ function useStreamTTS({
|
|
|
1273
1262
|
onPlayPause,
|
|
1274
1263
|
onPlayResume,
|
|
1275
1264
|
onPlayEnd,
|
|
1265
|
+
onStop,
|
|
1276
1266
|
onError,
|
|
1267
|
+
fallbackVoice,
|
|
1277
1268
|
visualization,
|
|
1278
|
-
|
|
1269
|
+
streamId: externalStreamId
|
|
1279
1270
|
}) {
|
|
1280
|
-
const
|
|
1281
|
-
const [
|
|
1282
|
-
const [
|
|
1283
|
-
const
|
|
1284
|
-
const [
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1271
|
+
const isSubscriptionMode = !!externalStreamId;
|
|
1272
|
+
const [internalStreamId, setInternalStreamId] = useState3("");
|
|
1273
|
+
const [isSwitchedToIndependent, setIsSwitchedToIndependent] = useState3(false);
|
|
1274
|
+
const streamId = isSwitchedToIndependent ? internalStreamId : externalStreamId || internalStreamId;
|
|
1275
|
+
const [state, setState] = useState3({
|
|
1276
|
+
isPlaying: false,
|
|
1277
|
+
isPaused: false,
|
|
1278
|
+
isSynthesizing: false,
|
|
1279
|
+
progress: 0,
|
|
1280
|
+
visualizationData: {
|
|
1281
|
+
frequencyData: new Uint8Array(0),
|
|
1282
|
+
timeDomainData: new Uint8Array(0)
|
|
1283
|
+
},
|
|
1284
|
+
error: null,
|
|
1285
|
+
isConnected: false,
|
|
1286
|
+
isSessionStarted: false,
|
|
1287
|
+
isStreamFinished: false
|
|
1291
1288
|
});
|
|
1292
|
-
const
|
|
1293
|
-
const
|
|
1294
|
-
const
|
|
1295
|
-
const
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
const isStreamFinishedRef = useRef4(false);
|
|
1308
|
-
const isSessionFinishedRef = useRef4(false);
|
|
1309
|
-
const resolveAllSegmentsSentRef = useRef4(null);
|
|
1310
|
-
const currentVoiceRef = useRef4("");
|
|
1311
|
-
const initAudioContext = useCallback4(() => {
|
|
1312
|
-
if (!audioRef.current) return;
|
|
1313
|
-
if (!audioContextRef.current) {
|
|
1314
|
-
const AudioContextClass = window.AudioContext || window.webkitAudioContext;
|
|
1315
|
-
audioContextRef.current = new AudioContextClass();
|
|
1316
|
-
}
|
|
1317
|
-
if (audioContextRef.current.state === "suspended") {
|
|
1318
|
-
audioContextRef.current.resume();
|
|
1319
|
-
}
|
|
1320
|
-
if (!analyserRef.current) {
|
|
1321
|
-
analyserRef.current = audioContextRef.current.createAnalyser();
|
|
1322
|
-
analyserRef.current.fftSize = visualization?.fftSize || 256;
|
|
1323
|
-
}
|
|
1324
|
-
if (!sourceRef.current) {
|
|
1325
|
-
try {
|
|
1326
|
-
sourceRef.current = audioContextRef.current.createMediaElementSource(audioRef.current);
|
|
1327
|
-
sourceRef.current.connect(analyserRef.current);
|
|
1328
|
-
analyserRef.current.connect(audioContextRef.current.destination);
|
|
1329
|
-
} catch (e) {
|
|
1330
|
-
}
|
|
1289
|
+
const [error, setErrorState] = useState3(null);
|
|
1290
|
+
const isFallbackRef = useRef3(false);
|
|
1291
|
+
const fallbackUtteranceRef = useRef3(null);
|
|
1292
|
+
const currentTextRef = useRef3("");
|
|
1293
|
+
useEffect2(() => {
|
|
1294
|
+
if (!streamId) return;
|
|
1295
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1296
|
+
if (session) {
|
|
1297
|
+
const unsubscribe = session.subscribe((newState) => {
|
|
1298
|
+
setState(newState);
|
|
1299
|
+
if (newState.error) setErrorState(newState.error);
|
|
1300
|
+
});
|
|
1301
|
+
return () => {
|
|
1302
|
+
unsubscribe();
|
|
1303
|
+
};
|
|
1331
1304
|
}
|
|
1332
|
-
}, [
|
|
1333
|
-
const
|
|
1334
|
-
if (
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
audioRef.current.onerror = null;
|
|
1340
|
-
audioRef.current.onended = null;
|
|
1341
|
-
audioRef.current.onpause = null;
|
|
1342
|
-
audioRef.current.onplay = null;
|
|
1343
|
-
audioRef.current.ontimeupdate = null;
|
|
1344
|
-
audioRef.current.pause();
|
|
1345
|
-
audioRef.current.src = "";
|
|
1346
|
-
audioRef.current = null;
|
|
1347
|
-
}
|
|
1348
|
-
if (sourceRef.current) {
|
|
1349
|
-
try {
|
|
1350
|
-
sourceRef.current.disconnect();
|
|
1351
|
-
} catch (e) {
|
|
1305
|
+
}, [streamId]);
|
|
1306
|
+
const stop = useCallback3(() => {
|
|
1307
|
+
if (streamId) {
|
|
1308
|
+
StreamPlaybackManager.stop(streamId);
|
|
1309
|
+
if (!isSubscriptionMode || isSwitchedToIndependent) {
|
|
1310
|
+
setInternalStreamId("");
|
|
1311
|
+
setIsSwitchedToIndependent(false);
|
|
1352
1312
|
}
|
|
1353
|
-
sourceRef.current = null;
|
|
1354
1313
|
}
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
if (id !== instanceId) {
|
|
1359
|
-
instance.pause();
|
|
1360
|
-
}
|
|
1361
|
-
});
|
|
1362
|
-
}, [instanceId]);
|
|
1363
|
-
const pause = useCallback4(() => {
|
|
1364
|
-
if (audioRef.current) {
|
|
1365
|
-
audioRef.current.pause();
|
|
1314
|
+
if (fallbackUtteranceRef.current) {
|
|
1315
|
+
window.speechSynthesis.cancel();
|
|
1316
|
+
fallbackUtteranceRef.current = null;
|
|
1366
1317
|
}
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
if (
|
|
1383
|
-
|
|
1318
|
+
isFallbackRef.current = false;
|
|
1319
|
+
setState((prev) => ({
|
|
1320
|
+
...prev,
|
|
1321
|
+
isPlaying: false,
|
|
1322
|
+
isPaused: false,
|
|
1323
|
+
isSynthesizing: false,
|
|
1324
|
+
progress: 0
|
|
1325
|
+
}));
|
|
1326
|
+
onStop?.();
|
|
1327
|
+
}, [streamId, isSubscriptionMode, isSwitchedToIndependent, onStop]);
|
|
1328
|
+
const pause = useCallback3(() => {
|
|
1329
|
+
if (isFallbackRef.current) {
|
|
1330
|
+
window.speechSynthesis.pause();
|
|
1331
|
+
setState((prev) => ({ ...prev, isPaused: true, isPlaying: false }));
|
|
1332
|
+
onPlayPause?.();
|
|
1333
|
+
} else if (streamId) {
|
|
1334
|
+
StreamPlaybackManager.pause(streamId);
|
|
1384
1335
|
}
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1336
|
+
}, [streamId, onPlayPause]);
|
|
1337
|
+
const resume = useCallback3(() => {
|
|
1338
|
+
if (isFallbackRef.current) {
|
|
1339
|
+
window.speechSynthesis.resume();
|
|
1340
|
+
setState((prev) => ({ ...prev, isPaused: false, isPlaying: true }));
|
|
1341
|
+
onPlayResume?.();
|
|
1342
|
+
} else if (streamId) {
|
|
1343
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1344
|
+
if (session) {
|
|
1345
|
+
StreamPlaybackManager.resume(streamId);
|
|
1346
|
+
} else {
|
|
1347
|
+
console.log(
|
|
1348
|
+
"[useMessageTTS] Session not found, resetting pause state"
|
|
1349
|
+
);
|
|
1350
|
+
setState((prev) => ({ ...prev, isPaused: false, isPlaying: false }));
|
|
1391
1351
|
}
|
|
1392
|
-
return;
|
|
1393
1352
|
}
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
}, []);
|
|
1402
|
-
const
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
setIsSynthesizing(false);
|
|
1416
|
-
setProgress(0);
|
|
1417
|
-
activeInstances2.delete(instanceId);
|
|
1418
|
-
streamTextRef.current = "";
|
|
1419
|
-
setStreamText("");
|
|
1420
|
-
segmentQueueRef.current = [];
|
|
1421
|
-
isSendingRef.current = false;
|
|
1422
|
-
sessionAudioBuffersRef.current = [];
|
|
1423
|
-
isStreamFinishedRef.current = false;
|
|
1424
|
-
isSessionFinishedRef.current = false;
|
|
1425
|
-
splitterRef.current?.reset();
|
|
1426
|
-
}, [cleanupAudio, instanceId]);
|
|
1427
|
-
const connect = useCallback4(async () => {
|
|
1428
|
-
stop();
|
|
1429
|
-
setErrorState(null);
|
|
1430
|
-
setProgress(0);
|
|
1431
|
-
sessionAudioBuffersRef.current = [];
|
|
1432
|
-
isStreamFinishedRef.current = false;
|
|
1433
|
-
streamTextRef.current = "";
|
|
1434
|
-
setStreamText("");
|
|
1435
|
-
segmentQueueRef.current = [];
|
|
1436
|
-
isSendingRef.current = false;
|
|
1437
|
-
isSessionStartedRef.current = false;
|
|
1438
|
-
calledSessionStartedRef.current = false;
|
|
1439
|
-
setIsSessionStarted(false);
|
|
1440
|
-
const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
|
|
1441
|
-
currentVoiceRef.current = voice;
|
|
1442
|
-
const startTime = Date.now();
|
|
1443
|
-
metricsCollector.record({
|
|
1444
|
-
name: "tts_request",
|
|
1445
|
-
labels: { voice, text_length: 0 },
|
|
1446
|
-
value: 1,
|
|
1447
|
-
timestamp: startTime
|
|
1448
|
-
});
|
|
1449
|
-
try {
|
|
1450
|
-
const audio = new Audio();
|
|
1451
|
-
audio.crossOrigin = "anonymous";
|
|
1452
|
-
audioRef.current = audio;
|
|
1453
|
-
audio.onplay = () => {
|
|
1454
|
-
setIsPlaying(true);
|
|
1455
|
-
setIsPaused(false);
|
|
1353
|
+
}, [streamId, onPlayResume]);
|
|
1354
|
+
const togglePlay = useCallback3(() => {
|
|
1355
|
+
if (state.isPlaying) {
|
|
1356
|
+
pause();
|
|
1357
|
+
} else {
|
|
1358
|
+
resume();
|
|
1359
|
+
}
|
|
1360
|
+
}, [state.isPlaying, pause, resume]);
|
|
1361
|
+
const playFallback = useCallback3(
|
|
1362
|
+
(text) => {
|
|
1363
|
+
console.warn("[useMessageTTS] Switching to fallback TTS");
|
|
1364
|
+
stop();
|
|
1365
|
+
isFallbackRef.current = true;
|
|
1366
|
+
setErrorState(null);
|
|
1367
|
+
const utterance = new SpeechSynthesisUtterance(text);
|
|
1368
|
+
utterance.rate = audioParams?.speech_rate || 1;
|
|
1369
|
+
const voices = window.speechSynthesis.getVoices();
|
|
1370
|
+
const zhVoice = voices.find((v) => v.lang.includes("zh"));
|
|
1371
|
+
if (zhVoice) utterance.voice = zhVoice;
|
|
1372
|
+
utterance.onstart = () => {
|
|
1373
|
+
setState((prev) => ({ ...prev, isPlaying: true, isPaused: false }));
|
|
1456
1374
|
onPlayStart?.();
|
|
1457
|
-
initAudioContext();
|
|
1458
|
-
activeInstances2.set(instanceId, { pause });
|
|
1459
1375
|
};
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1376
|
+
utterance.onend = () => {
|
|
1377
|
+
setState((prev) => ({
|
|
1378
|
+
...prev,
|
|
1379
|
+
isPlaying: false,
|
|
1380
|
+
isPaused: false,
|
|
1381
|
+
progress: 100
|
|
1382
|
+
}));
|
|
1463
1383
|
onPlayEnd?.();
|
|
1464
|
-
activeInstances2.delete(instanceId);
|
|
1465
1384
|
};
|
|
1466
|
-
|
|
1467
|
-
console.error("[
|
|
1468
|
-
setErrorState(
|
|
1469
|
-
onError?.(new Error(
|
|
1470
|
-
};
|
|
1471
|
-
audio.ontimeupdate = () => {
|
|
1472
|
-
let duration = audio.duration;
|
|
1473
|
-
if (!isFinite(duration) && audio.buffered.length > 0) {
|
|
1474
|
-
duration = audio.buffered.end(audio.buffered.length - 1);
|
|
1475
|
-
}
|
|
1476
|
-
if (isFinite(duration) && duration > 0) {
|
|
1477
|
-
setProgress(audio.currentTime / duration * 100);
|
|
1478
|
-
}
|
|
1385
|
+
utterance.onerror = (e) => {
|
|
1386
|
+
console.error("[useMessageTTS] Fallback TTS failed", e);
|
|
1387
|
+
setErrorState("Fallback TTS failed");
|
|
1388
|
+
onError?.(new Error("Fallback TTS failed"));
|
|
1479
1389
|
};
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1390
|
+
fallbackUtteranceRef.current = utterance;
|
|
1391
|
+
window.speechSynthesis.speak(utterance);
|
|
1392
|
+
},
|
|
1393
|
+
[audioParams, onError, onPlayEnd, onPlayStart, stop]
|
|
1394
|
+
);
|
|
1395
|
+
const handleError = useCallback3(
|
|
1396
|
+
(text, failedVoice) => {
|
|
1397
|
+
if (fallbackVoice && failedVoice !== fallbackVoice) {
|
|
1398
|
+
console.warn(
|
|
1399
|
+
`[useMessageTTS] Voice ${failedVoice} failed, switching to fallback voice ${fallbackVoice}`
|
|
1400
|
+
);
|
|
1401
|
+
const newId = internalStreamId || `msg-tts-retry-${Date.now()}`;
|
|
1402
|
+
setInternalStreamId(newId);
|
|
1403
|
+
const session = StreamPlaybackManager.createSession(newId, {
|
|
1404
|
+
ttsConfig,
|
|
1405
|
+
audioParams: { ...audioParams, speaker: fallbackVoice },
|
|
1406
|
+
autoPlay,
|
|
1407
|
+
metricsCollector,
|
|
1408
|
+
visualization,
|
|
1409
|
+
onPlayStart,
|
|
1410
|
+
onPlayPause,
|
|
1411
|
+
onPlayResume,
|
|
1412
|
+
onPlayEnd,
|
|
1413
|
+
onError: () => playFallback(text)
|
|
1414
|
+
});
|
|
1415
|
+
session.play(text);
|
|
1416
|
+
} else {
|
|
1417
|
+
playFallback(text);
|
|
1418
|
+
}
|
|
1419
|
+
},
|
|
1420
|
+
[
|
|
1421
|
+
fallbackVoice,
|
|
1422
|
+
playFallback,
|
|
1423
|
+
ttsConfig,
|
|
1424
|
+
audioParams,
|
|
1425
|
+
autoPlay,
|
|
1426
|
+
metricsCollector,
|
|
1427
|
+
visualization,
|
|
1428
|
+
onPlayStart,
|
|
1429
|
+
onPlayPause,
|
|
1430
|
+
onPlayResume,
|
|
1431
|
+
onPlayEnd,
|
|
1432
|
+
internalStreamId
|
|
1433
|
+
]
|
|
1434
|
+
);
|
|
1435
|
+
const play = useCallback3(
|
|
1436
|
+
async (text) => {
|
|
1437
|
+
let shouldSwitchToIndependent = false;
|
|
1438
|
+
if (isSubscriptionMode) {
|
|
1439
|
+
const session2 = StreamPlaybackManager.getSession(externalStreamId || "");
|
|
1440
|
+
if (!session2) {
|
|
1441
|
+
console.log(
|
|
1442
|
+
"[useMessageTTS] Stream session not found, switching to independent play mode"
|
|
1443
|
+
);
|
|
1444
|
+
shouldSwitchToIndependent = true;
|
|
1445
|
+
setIsSwitchedToIndependent(true);
|
|
1446
|
+
} else if (session2.state.isStreamFinished) {
|
|
1447
|
+
console.log(
|
|
1448
|
+
"[useMessageTTS] Stream finished, switching to independent play mode"
|
|
1449
|
+
);
|
|
1450
|
+
shouldSwitchToIndependent = true;
|
|
1451
|
+
setIsSwitchedToIndependent(true);
|
|
1452
|
+
} else if (session2.state.isSynthesizing || session2.state.isPlaying) {
|
|
1453
|
+
console.warn(
|
|
1454
|
+
"[useMessageTTS] play() called in subscription mode while streaming, ignoring"
|
|
1455
|
+
);
|
|
1456
|
+
return;
|
|
1457
|
+
} else {
|
|
1458
|
+
console.log(
|
|
1459
|
+
"[useMessageTTS] Stream not active, switching to independent play mode"
|
|
1460
|
+
);
|
|
1461
|
+
shouldSwitchToIndependent = true;
|
|
1462
|
+
setIsSwitchedToIndependent(true);
|
|
1492
1463
|
}
|
|
1493
|
-
}
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
format: audioParams?.format || "mp3",
|
|
1510
|
-
speech_rate: audioParams?.speech_rate,
|
|
1511
|
-
pitch_rate: audioParams?.pitch_rate,
|
|
1512
|
-
loudness_rate: audioParams?.loudness_rate
|
|
1513
|
-
},
|
|
1514
|
-
additions: JSON.stringify({
|
|
1515
|
-
enable_language_detector: true,
|
|
1516
|
-
disable_markdown_filter: true,
|
|
1517
|
-
enable_latex_tn: true
|
|
1518
|
-
})
|
|
1519
|
-
}
|
|
1520
|
-
},
|
|
1521
|
-
// ===== 关键回调 =====
|
|
1522
|
-
onStart: () => {
|
|
1523
|
-
setIsConnected(true);
|
|
1524
|
-
isConnectedRef.current = true;
|
|
1525
|
-
console.log("[useStreamTTS] WebSocket connected, waiting for text...");
|
|
1526
|
-
},
|
|
1527
|
-
onSessionStarted: () => {
|
|
1528
|
-
setIsSessionStarted(true);
|
|
1529
|
-
isSessionStartedRef.current = true;
|
|
1530
|
-
console.log("[useStreamTTS] Session started, can send text now");
|
|
1531
|
-
if (segmentQueueRef.current.length > 0) {
|
|
1532
|
-
sendNextSegment();
|
|
1533
|
-
}
|
|
1534
|
-
},
|
|
1535
|
-
onMessage: (data) => {
|
|
1536
|
-
setIsSynthesizing(true);
|
|
1537
|
-
if (sessionAudioBuffersRef.current.length === 0) {
|
|
1538
|
-
metricsCollector.record({
|
|
1539
|
-
name: "tts_latency",
|
|
1540
|
-
labels: { stage: "first_packet", voice },
|
|
1541
|
-
value: Date.now() - startTime,
|
|
1542
|
-
timestamp: Date.now()
|
|
1543
|
-
});
|
|
1544
|
-
}
|
|
1545
|
-
const buffer = data instanceof ArrayBuffer ? data.slice(0) : new Uint8Array(data).buffer;
|
|
1546
|
-
sessionAudioBuffersRef.current.push(buffer);
|
|
1547
|
-
},
|
|
1548
|
-
onSessionFinished: () => {
|
|
1549
|
-
setIsSynthesizing(false);
|
|
1550
|
-
setIsSessionStarted(false);
|
|
1551
|
-
isSessionStartedRef.current = false;
|
|
1552
|
-
calledSessionStartedRef.current = false;
|
|
1553
|
-
if (sessionAudioBuffersRef.current.length > 0 && streamTextRef.current) {
|
|
1554
|
-
const speed = audioParams?.speech_rate || 0;
|
|
1555
|
-
const cacheKey = TTSCache.generateKey(streamTextRef.current, voice, speed);
|
|
1556
|
-
TTSCache.set(cacheKey, [...sessionAudioBuffersRef.current]);
|
|
1557
|
-
sessionAudioCache.set(instanceId, {
|
|
1558
|
-
streamText: streamTextRef.current,
|
|
1559
|
-
audioBuffers: [...sessionAudioBuffersRef.current],
|
|
1560
|
-
timestamp: Date.now(),
|
|
1561
|
-
voice,
|
|
1562
|
-
speed
|
|
1563
|
-
});
|
|
1564
|
-
console.log(`[useStreamTTS] Session finished, cached ${sessionAudioBuffersRef.current.length} audio buffers`);
|
|
1565
|
-
}
|
|
1566
|
-
metricsCollector.record({
|
|
1567
|
-
name: "tts_synthesis_finished",
|
|
1568
|
-
labels: { voice, text_length: streamTextRef.current.length },
|
|
1569
|
-
value: Date.now() - startTime,
|
|
1570
|
-
timestamp: Date.now()
|
|
1571
|
-
});
|
|
1572
|
-
},
|
|
1464
|
+
}
|
|
1465
|
+
currentTextRef.current = text;
|
|
1466
|
+
stop();
|
|
1467
|
+
setErrorState(null);
|
|
1468
|
+
isFallbackRef.current = false;
|
|
1469
|
+
const id = `msg-tts-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
1470
|
+
const session = StreamPlaybackManager.createSession(id, {
|
|
1471
|
+
ttsConfig,
|
|
1472
|
+
audioParams,
|
|
1473
|
+
autoPlay,
|
|
1474
|
+
metricsCollector,
|
|
1475
|
+
visualization,
|
|
1476
|
+
onPlayStart,
|
|
1477
|
+
onPlayPause,
|
|
1478
|
+
onPlayResume,
|
|
1479
|
+
onPlayEnd,
|
|
1573
1480
|
onError: (err) => {
|
|
1574
|
-
|
|
1575
|
-
setErrorState(err.msg || "TTS error");
|
|
1576
|
-
onError?.(new Error(err.msg || "TTS error"));
|
|
1577
|
-
setIsSynthesizing(false);
|
|
1481
|
+
handleError(text, audioParams?.speaker || "");
|
|
1578
1482
|
}
|
|
1579
1483
|
});
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1484
|
+
setInternalStreamId(id);
|
|
1485
|
+
await session.play(text);
|
|
1486
|
+
},
|
|
1487
|
+
[
|
|
1488
|
+
isSubscriptionMode,
|
|
1489
|
+
externalStreamId,
|
|
1490
|
+
stop,
|
|
1491
|
+
ttsConfig,
|
|
1492
|
+
audioParams,
|
|
1493
|
+
autoPlay,
|
|
1494
|
+
metricsCollector,
|
|
1495
|
+
visualization,
|
|
1496
|
+
onPlayStart,
|
|
1497
|
+
onPlayPause,
|
|
1498
|
+
onPlayResume,
|
|
1499
|
+
onPlayEnd,
|
|
1500
|
+
handleError
|
|
1501
|
+
]
|
|
1502
|
+
);
|
|
1503
|
+
const seek = useCallback3(
|
|
1504
|
+
(percentage) => {
|
|
1505
|
+
if (streamId) {
|
|
1506
|
+
StreamPlaybackManager.getSession(streamId)?.seek(percentage);
|
|
1588
1507
|
}
|
|
1589
|
-
}
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1508
|
+
},
|
|
1509
|
+
[streamId]
|
|
1510
|
+
);
|
|
1511
|
+
const getFrequencyData = useCallback3(
|
|
1512
|
+
() => state.visualizationData.frequencyData,
|
|
1513
|
+
[state.visualizationData]
|
|
1514
|
+
);
|
|
1515
|
+
const getTimeDomainData = useCallback3(
|
|
1516
|
+
() => state.visualizationData.timeDomainData,
|
|
1517
|
+
[state.visualizationData]
|
|
1518
|
+
);
|
|
1519
|
+
const isStreamActive = !!(externalStreamId && (state.isPlaying || state.isPaused || state.isSynthesizing));
|
|
1520
|
+
const canResume = useCallback3(() => {
|
|
1521
|
+
if (!streamId) return false;
|
|
1522
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1523
|
+
return !!session;
|
|
1524
|
+
}, [streamId]);
|
|
1525
|
+
return {
|
|
1526
|
+
isPlaying: state.isPlaying,
|
|
1527
|
+
isPaused: state.isPaused,
|
|
1528
|
+
isSynthesizing: state.isSynthesizing,
|
|
1529
|
+
progress: state.progress,
|
|
1530
|
+
error,
|
|
1531
|
+
play,
|
|
1532
|
+
pause,
|
|
1533
|
+
resume,
|
|
1534
|
+
stop,
|
|
1535
|
+
togglePlay,
|
|
1536
|
+
seek,
|
|
1537
|
+
getFrequencyData,
|
|
1538
|
+
getTimeDomainData,
|
|
1539
|
+
visualizationData: state.visualizationData,
|
|
1540
|
+
isStreamActive,
|
|
1541
|
+
streamState: state,
|
|
1542
|
+
canResume
|
|
1543
|
+
};
|
|
1544
|
+
}
|
|
1545
|
+
|
|
1546
|
+
// src/tts/useStreamTTS.ts
|
|
1547
|
+
import { useCallback as useCallback4, useEffect as useEffect3, useRef as useRef4, useState as useState4 } from "react";
|
|
1548
|
+
function useStreamTTS({
|
|
1549
|
+
ttsConfig,
|
|
1550
|
+
audioParams,
|
|
1551
|
+
autoPlay = true,
|
|
1552
|
+
metricsCollector = new NoopMetricsCollector(),
|
|
1553
|
+
onPlayStart,
|
|
1554
|
+
onPlayPause,
|
|
1555
|
+
onPlayResume,
|
|
1556
|
+
onPlayEnd,
|
|
1557
|
+
onError,
|
|
1558
|
+
visualization,
|
|
1559
|
+
maxSegmentLength = 150
|
|
1560
|
+
}) {
|
|
1561
|
+
const [streamId, setStreamId] = useState4("");
|
|
1562
|
+
const streamIdRef = useRef4("");
|
|
1563
|
+
const [state, setState] = useState4({
|
|
1564
|
+
isPlaying: false,
|
|
1565
|
+
isPaused: false,
|
|
1566
|
+
isSynthesizing: false,
|
|
1567
|
+
progress: 0,
|
|
1568
|
+
visualizationData: {
|
|
1569
|
+
frequencyData: new Uint8Array(0),
|
|
1570
|
+
timeDomainData: new Uint8Array(0)
|
|
1571
|
+
},
|
|
1572
|
+
error: null,
|
|
1573
|
+
isConnected: false,
|
|
1574
|
+
isSessionStarted: false,
|
|
1575
|
+
isStreamFinished: false
|
|
1576
|
+
});
|
|
1577
|
+
const [streamText, setStreamText] = useState4("");
|
|
1578
|
+
const streamTextRef = useRef4("");
|
|
1579
|
+
const connect = useCallback4(async () => {
|
|
1580
|
+
const newStreamId = `tts-stream-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
1581
|
+
setStreamId(newStreamId);
|
|
1582
|
+
streamIdRef.current = newStreamId;
|
|
1583
|
+
streamTextRef.current = "";
|
|
1584
|
+
setStreamText("");
|
|
1585
|
+
const session = StreamPlaybackManager.createSession(newStreamId, {
|
|
1586
|
+
ttsConfig,
|
|
1587
|
+
audioParams,
|
|
1588
|
+
autoPlay,
|
|
1589
|
+
metricsCollector,
|
|
1590
|
+
visualization,
|
|
1591
|
+
maxSegmentLength,
|
|
1592
|
+
onPlayStart,
|
|
1593
|
+
onPlayPause,
|
|
1594
|
+
onPlayResume,
|
|
1595
|
+
onPlayEnd,
|
|
1596
|
+
onError: (err) => {
|
|
1597
|
+
setState((prev) => ({ ...prev, error: err.message }));
|
|
1598
|
+
onError?.(err);
|
|
1599
|
+
}
|
|
1600
|
+
});
|
|
1601
|
+
await session.connect();
|
|
1602
|
+
return newStreamId;
|
|
1594
1603
|
}, [
|
|
1595
1604
|
ttsConfig,
|
|
1596
1605
|
audioParams,
|
|
1597
1606
|
autoPlay,
|
|
1598
|
-
stop,
|
|
1599
|
-
instanceId,
|
|
1600
|
-
onPlayStart,
|
|
1601
|
-
onPlayEnd,
|
|
1602
|
-
initAudioContext,
|
|
1603
|
-
pause,
|
|
1604
1607
|
metricsCollector,
|
|
1608
|
+
visualization,
|
|
1605
1609
|
maxSegmentLength,
|
|
1606
|
-
|
|
1610
|
+
onPlayStart,
|
|
1611
|
+
onPlayPause,
|
|
1612
|
+
onPlayResume,
|
|
1613
|
+
onPlayEnd,
|
|
1607
1614
|
onError
|
|
1608
1615
|
]);
|
|
1616
|
+
useEffect3(() => {
|
|
1617
|
+
if (!streamId) return;
|
|
1618
|
+
const session = StreamPlaybackManager.getSession(streamId);
|
|
1619
|
+
if (!session) return;
|
|
1620
|
+
const unsubscribe = session.subscribe((newState) => {
|
|
1621
|
+
setState(newState);
|
|
1622
|
+
});
|
|
1623
|
+
return () => {
|
|
1624
|
+
unsubscribe();
|
|
1625
|
+
};
|
|
1626
|
+
}, [streamId]);
|
|
1609
1627
|
const onMessage = useCallback4((chunk) => {
|
|
1610
|
-
if (!
|
|
1628
|
+
if (!streamIdRef.current) return;
|
|
1611
1629
|
streamTextRef.current += chunk;
|
|
1612
1630
|
setStreamText(streamTextRef.current);
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
calledSessionStartedRef.current = true;
|
|
1616
|
-
clientRef.current.startSession();
|
|
1617
|
-
}
|
|
1618
|
-
splitterRef.current?.onChunk(chunk);
|
|
1631
|
+
const session = StreamPlaybackManager.getSession(streamIdRef.current);
|
|
1632
|
+
session?.handleStreamChunk(chunk);
|
|
1619
1633
|
}, []);
|
|
1620
1634
|
const finishStream = useCallback4(async () => {
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
if (segmentQueueRef.current.length > 0 || isSendingRef.current) {
|
|
1625
|
-
await new Promise((resolve) => {
|
|
1626
|
-
resolveAllSegmentsSentRef.current = resolve;
|
|
1627
|
-
});
|
|
1628
|
-
} else if (clientRef.current && isSessionStartedRef.current && !isSessionFinishedRef.current) {
|
|
1629
|
-
isSessionFinishedRef.current = true;
|
|
1630
|
-
clientRef.current.finishSession();
|
|
1631
|
-
}
|
|
1635
|
+
if (!streamIdRef.current) return;
|
|
1636
|
+
const session = StreamPlaybackManager.getSession(streamIdRef.current);
|
|
1637
|
+
await session?.finishStream();
|
|
1632
1638
|
}, []);
|
|
1633
|
-
const
|
|
1634
|
-
if (
|
|
1635
|
-
|
|
1636
|
-
if (!isFinite(duration) && audioRef.current.buffered.length > 0) {
|
|
1637
|
-
duration = audioRef.current.buffered.end(audioRef.current.buffered.length - 1);
|
|
1638
|
-
}
|
|
1639
|
-
if (isFinite(duration) && duration > 0) {
|
|
1640
|
-
const time = percentage / 100 * duration;
|
|
1641
|
-
if (isFinite(time)) {
|
|
1642
|
-
audioRef.current.currentTime = time;
|
|
1643
|
-
setProgress(percentage);
|
|
1644
|
-
}
|
|
1645
|
-
}
|
|
1639
|
+
const pause = useCallback4(() => {
|
|
1640
|
+
if (streamIdRef.current) {
|
|
1641
|
+
StreamPlaybackManager.pause(streamIdRef.current);
|
|
1646
1642
|
}
|
|
1647
1643
|
}, []);
|
|
1648
|
-
const
|
|
1649
|
-
if (
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
return dataArray;
|
|
1644
|
+
const resume = useCallback4(() => {
|
|
1645
|
+
if (streamIdRef.current) {
|
|
1646
|
+
StreamPlaybackManager.resume(streamIdRef.current);
|
|
1647
|
+
}
|
|
1653
1648
|
}, []);
|
|
1654
|
-
const
|
|
1655
|
-
if (
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1649
|
+
const stop = useCallback4(() => {
|
|
1650
|
+
if (streamIdRef.current) {
|
|
1651
|
+
StreamPlaybackManager.stop(streamIdRef.current);
|
|
1652
|
+
setStreamId("");
|
|
1653
|
+
streamIdRef.current = "";
|
|
1654
|
+
}
|
|
1659
1655
|
}, []);
|
|
1660
|
-
|
|
1661
|
-
if (
|
|
1662
|
-
|
|
1663
|
-
let lastUpdate = 0;
|
|
1664
|
-
const interval = visualization.refreshInterval || 0;
|
|
1665
|
-
const update = (timestamp) => {
|
|
1666
|
-
if (isPlaying && !isPaused) {
|
|
1667
|
-
if (timestamp - lastUpdate >= interval) {
|
|
1668
|
-
setVisualizationData({
|
|
1669
|
-
frequencyData: getFrequencyData(),
|
|
1670
|
-
timeDomainData: getTimeDomainData()
|
|
1671
|
-
});
|
|
1672
|
-
lastUpdate = timestamp;
|
|
1673
|
-
}
|
|
1674
|
-
animId = requestAnimationFrame(update);
|
|
1675
|
-
}
|
|
1676
|
-
};
|
|
1677
|
-
if (isPlaying && !isPaused) {
|
|
1678
|
-
animId = requestAnimationFrame(update);
|
|
1656
|
+
const seek = useCallback4((percentage) => {
|
|
1657
|
+
if (streamIdRef.current) {
|
|
1658
|
+
StreamPlaybackManager.getSession(streamIdRef.current)?.seek(percentage);
|
|
1679
1659
|
}
|
|
1680
|
-
|
|
1681
|
-
if (animId) cancelAnimationFrame(animId);
|
|
1682
|
-
};
|
|
1683
|
-
}, [isPlaying, isPaused, visualization, getFrequencyData, getTimeDomainData]);
|
|
1660
|
+
}, []);
|
|
1684
1661
|
useEffect3(() => {
|
|
1685
1662
|
return () => {
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
audioContextRef.current.close();
|
|
1663
|
+
if (streamIdRef.current) {
|
|
1664
|
+
StreamPlaybackManager.stop(streamIdRef.current);
|
|
1689
1665
|
}
|
|
1690
1666
|
};
|
|
1691
|
-
}, [
|
|
1667
|
+
}, []);
|
|
1668
|
+
const getFrequencyData = useCallback4(
|
|
1669
|
+
() => state.visualizationData.frequencyData,
|
|
1670
|
+
[state.visualizationData]
|
|
1671
|
+
);
|
|
1672
|
+
const getTimeDomainData = useCallback4(
|
|
1673
|
+
() => state.visualizationData.timeDomainData,
|
|
1674
|
+
[state.visualizationData]
|
|
1675
|
+
);
|
|
1692
1676
|
return {
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1677
|
+
streamId,
|
|
1678
|
+
isConnected: state.isConnected,
|
|
1679
|
+
isSessionStarted: state.isSessionStarted,
|
|
1680
|
+
isSynthesizing: state.isSynthesizing,
|
|
1681
|
+
isPlaying: state.isPlaying,
|
|
1682
|
+
isPaused: state.isPaused,
|
|
1683
|
+
error: state.error,
|
|
1699
1684
|
streamText,
|
|
1700
|
-
progress,
|
|
1685
|
+
progress: state.progress,
|
|
1701
1686
|
connect,
|
|
1702
1687
|
onMessage,
|
|
1703
1688
|
finishStream,
|
|
@@ -1707,23 +1692,9 @@ function useStreamTTS({
|
|
|
1707
1692
|
seek,
|
|
1708
1693
|
getFrequencyData,
|
|
1709
1694
|
getTimeDomainData,
|
|
1710
|
-
visualizationData
|
|
1695
|
+
visualizationData: state.visualizationData
|
|
1711
1696
|
};
|
|
1712
1697
|
}
|
|
1713
|
-
function getSessionAudioCache(instanceId) {
|
|
1714
|
-
return sessionAudioCache.get(instanceId);
|
|
1715
|
-
}
|
|
1716
|
-
function clearSessionAudioCache(instanceId) {
|
|
1717
|
-
sessionAudioCache.delete(instanceId);
|
|
1718
|
-
}
|
|
1719
|
-
function findSessionCacheByText(streamText, voice, speed) {
|
|
1720
|
-
for (const entry of sessionAudioCache.values()) {
|
|
1721
|
-
if (entry.streamText === streamText && entry.voice === voice && entry.speed === speed) {
|
|
1722
|
-
return entry;
|
|
1723
|
-
}
|
|
1724
|
-
}
|
|
1725
|
-
return void 0;
|
|
1726
|
-
}
|
|
1727
1698
|
|
|
1728
1699
|
// src/components/AudioWaveVisualizer.tsx
|
|
1729
1700
|
import { useEffect as useEffect4, useRef as useRef5 } from "react";
|
|
@@ -2098,10 +2069,8 @@ var AudioProgressBar_default = AudioProgressBar;
|
|
|
2098
2069
|
export {
|
|
2099
2070
|
AudioProgressBar_default as AudioProgressBar,
|
|
2100
2071
|
AudioWaveVisualizer_default as AudioWaveVisualizer,
|
|
2072
|
+
StreamPlaybackManager,
|
|
2101
2073
|
StreamingTextSplitter,
|
|
2102
|
-
clearSessionAudioCache,
|
|
2103
|
-
findSessionCacheByText,
|
|
2104
|
-
getSessionAudioCache,
|
|
2105
2074
|
splitTextByDelimiters,
|
|
2106
2075
|
useMessageTTS,
|
|
2107
2076
|
useStreamTTS,
|