npm - @wq-hook/volcano-react - Versions diffs - 1.0.0 → 1.0.2 - Mend

@wq-hook/volcano-react 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js CHANGED Viewed

@@ -32,8 +32,13 @@ var index_exports = {};
 __export(index_exports, {
   AudioProgressBar: () => AudioProgressBar_default,
   AudioWaveVisualizer: () => AudioWaveVisualizer_default,
+  StreamingTextSplitter: () => StreamingTextSplitter,
+  clearSessionAudioCache: () => clearSessionAudioCache,
+  findSessionCacheByText: () => findSessionCacheByText,
+  getSessionAudioCache: () => getSessionAudioCache,
   splitTextByDelimiters: () => splitTextByDelimiters,
   useMessageTTS: () => useMessageTTS,
+  useStreamTTS: () => useStreamTTS,
   useVolcanoASR: () => useVolcanoASR,
   useVolcanoTTS: () => useVolcanoTTS
 });
@@ -550,10 +555,12 @@ function useMessageTTS({
   const [isSynthesizing, setIsSynthesizing] = (0, import_react3.useState)(false);
   const [error, setErrorState] = (0, import_react3.useState)(null);
   const [progress, setProgress] = (0, import_react3.useState)(0);
-  const [visualizationData, setVisualizationData] = (0, import_react3.useState)({
-    frequencyData: new Uint8Array(0),
-    timeDomainData: new Uint8Array(0)
-  });
+  const [visualizationData, setVisualizationData] = (0, import_react3.useState)(
+    {
+      frequencyData: new Uint8Array(0),
+      timeDomainData: new Uint8Array(0)
+    }
+  );
   const instanceId = (0, import_react3.useRef)(
     `tts-${Date.now()}-${Math.random().toString(36).slice(2)}`
   ).current;
@@ -761,7 +768,11 @@ function useMessageTTS({
           console.error("Audio playback error:", e, audio.error);
           metricsCollector.record({
             name: "tts_error",
-            labels: { error_code: "playback_error", voice, detail: audio.error?.message || String(audio.error?.code) },
+            labels: {
+              error_code: "playback_error",
+              voice,
+              detail: audio.error?.message || String(audio.error?.code)
+            },
             value: 1,
             timestamp: Date.now()
           });
@@ -779,7 +790,10 @@ function useMessageTTS({
           }
         };
         if (cachedData) {
-          const totalSize = cachedData.reduce((acc, buf) => acc + buf.byteLength, 0);
+          const totalSize = cachedData.reduce(
+            (acc, buf) => acc + buf.byteLength,
+            0
+          );
           metricsCollector.record({
             name: "tts_cache_hit",
             labels: { voice, speed },
@@ -797,7 +811,9 @@ function useMessageTTS({
             })
           );
           if (totalSize === 0) {
-            console.warn("[useMessageTTS] Cached data is empty, falling back to stream");
+            console.warn(
+              "[useMessageTTS] Cached data is empty, falling back to stream"
+            );
           } else {
             const blob = new Blob(cachedData, { type: "audio/mpeg" });
             const url2 = URL.createObjectURL(blob);
@@ -816,7 +832,10 @@ function useMessageTTS({
         }
         console.log("[useMessageTTS] Cache miss, starting stream");
         clientRef.current = (0, import_tts2.WebsocketMSE)({ autoStartSession: true });
-        const formattedText = import_volcano_sdk2.MarkdownFormatter.format(text).replace((0, import_emoji_regex2.default)(), "");
+        const formattedText = import_volcano_sdk2.MarkdownFormatter.format(text).replace(
+          (0, import_emoji_regex2.default)(),
+          ""
+        );
         const segments = splitTextByDelimiters(formattedText);
         const url = clientRef.current.start({
           url: buildFullUrl2(WS_URL, {
@@ -934,6 +953,14 @@ function useMessageTTS({
         console.warn(
           `[useMessageTTS] Voice ${failedVoice} failed, switching to fallback voice ${fallbackVoice}`
         );
+        if (clientRef.current) {
+          clientRef.current.close();
+          clientRef.current = null;
+        }
+        if (audioRef.current) {
+          audioRef.current.pause();
+          audioRef.current = null;
+        }
         executeTTS(text, fallbackVoice);
       } else {
         playFallback(text);
@@ -943,7 +970,7 @@ function useMessageTTS({
   );
   const play = (0, import_react3.useCallback)(
     (text) => {
-      const voice = audioParams?.speaker || "default";
+      const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
       return executeTTS(text, voice);
     },
     [audioParams, executeTTS]
@@ -996,7 +1023,9 @@ function useMessageTTS({
     if (audioRef.current) {
       let duration = audioRef.current.duration;
       if (!isFinite(duration) && audioRef.current.buffered.length > 0) {
-        duration = audioRef.current.buffered.end(audioRef.current.buffered.length - 1);
+        duration = audioRef.current.buffered.end(
+          audioRef.current.buffered.length - 1
+        );
       }
       if (isFinite(duration) && duration > 0) {
         const time = percentage / 100 * duration;
@@ -1025,8 +1054,725 @@ function useMessageTTS({
   };
 }
-// src/components/AudioWaveVisualizer.tsx
+// src/tts/useStreamTTS.ts
+var import_tts3 = require("@wq-hook/volcano-sdk/tts");
 var import_react4 = require("react");
+// src/tts/StreamingTextSplitter.ts
+var import_volcano_sdk3 = require("@wq-hook/volcano-sdk");
+var import_emoji_regex3 = __toESM(require("emoji-regex"));
+var StreamingTextSplitter = class {
+  constructor(options = {}) {
+    /** 当前缓冲区 */
+    this.buffer = "";
+    /** 分段索引计数器 */
+    this.segmentIndex = 0;
+    /** 已完成的分段列表 */
+    this.segments = [];
+    /** 是否已完成 */
+    this.isCompleted = false;
+    this.maxLength = options.maxLength || 150;
+    this.minLength = options.minLength || 10;
+    this.onSegmentComplete = options.onSegmentComplete;
+    this.onAllComplete = options.onAllComplete;
+  }
+  /**
+   * 接收流式文本块
+   * @param chunk - 文本块
+   */
+  onChunk(chunk) {
+    if (!chunk || this.isCompleted) return;
+    this.buffer += chunk;
+    if (this.detectBoundary(chunk)) {
+      const newlineIndex = this.buffer.indexOf("\n");
+      if (newlineIndex !== -1) {
+        if (newlineIndex === 0) {
+          this.buffer = this.buffer.substring(1);
+          return;
+        }
+        const segmentBuffer = this.buffer.substring(0, newlineIndex);
+        this.buffer = this.buffer.substring(newlineIndex + 1);
+        this.flushSegmentWithBuffer(segmentBuffer);
+        while (this.buffer.includes("\n")) {
+          const nextNewlineIndex = this.buffer.indexOf("\n");
+          if (nextNewlineIndex === 0) {
+            this.buffer = this.buffer.substring(1);
+            continue;
+          }
+          const nextSegmentBuffer = this.buffer.substring(0, nextNewlineIndex);
+          this.buffer = this.buffer.substring(nextNewlineIndex + 1);
+          this.flushSegmentWithBuffer(nextSegmentBuffer);
+        }
+      }
+    }
+  }
+  /**
+   * 检测分段边界
+   * @param chunk - 最新接收的文本块
+   * @returns 是否应该分段
+   */
+  detectBoundary(chunk) {
+    if (chunk.includes("\n")) {
+      if (this.buffer.length >= this.maxLength) {
+        this.forceSplitAtSentenceBoundary();
+      }
+      return true;
+    }
+    if (this.buffer.length >= this.maxLength) {
+      this.forceSplitAtSentenceBoundary();
+      return true;
+    }
+    return false;
+  }
+  /**
+   * 在句子边界强制拆分超长段落
+   */
+  forceSplitAtSentenceBoundary() {
+    const content = this.buffer;
+    const sentenceEnders = /[。？！]/g;
+    let lastMatch = null;
+    let match = null;
+    while ((match = sentenceEnders.exec(content)) !== null) {
+      lastMatch = match;
+    }
+    if (lastMatch && lastMatch.index > this.minLength) {
+      const splitPoint = lastMatch.index + 1;
+      const firstPart = content.substring(0, splitPoint);
+      const secondPart = content.substring(splitPoint);
+      this.buffer = firstPart;
+      this.flushSegment();
+      this.buffer = secondPart;
+    } else {
+      const midPoint = Math.floor(content.length / 2);
+      const firstPart = content.substring(0, midPoint);
+      const secondPart = content.substring(midPoint);
+      this.buffer = firstPart;
+      this.flushSegment();
+      this.buffer = secondPart;
+    }
+  }
+  /**
+   * 使用指定缓冲区内容刷新为分段
+   * @param bufferToFlush - 要分段的缓冲区内容
+   */
+  flushSegmentWithBuffer(bufferToFlush) {
+    const content = bufferToFlush;
+    if (!content) return;
+    const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
+    const isTooShort = content.length < 3;
+    if (isPureSymbols && isTooShort) {
+      return;
+    }
+    const formattedContent = import_volcano_sdk3.MarkdownFormatter.format(content).replace((0, import_emoji_regex3.default)(), "");
+    if (!formattedContent) return;
+    let subSegments = [formattedContent];
+    if (formattedContent.length > this.maxLength) {
+      subSegments = this.splitLongSegment(formattedContent);
+    }
+    for (const subSegment of subSegments) {
+      if (!subSegment) continue;
+      const segment = {
+        index: this.segmentIndex++,
+        content: subSegment,
+        length: subSegment.length,
+        sent: false
+      };
+      this.segments.push(segment);
+      this.onSegmentComplete?.(segment);
+    }
+  }
+  /**
+   * 刷新当前缓冲区为分段
+   */
+  flushSegment() {
+    const content = this.buffer.trim();
+    if (!content) {
+      this.buffer = "";
+      return;
+    }
+    const isPureSymbols = /^[^\p{L}\p{N}]*$/u.test(content);
+    const isTooShort = content.length < 3;
+    if (isPureSymbols && isTooShort) {
+      this.buffer = "";
+      return;
+    }
+    const formattedContent = import_volcano_sdk3.MarkdownFormatter.format(content).replace((0, import_emoji_regex3.default)(), "");
+    if (!formattedContent) {
+      this.buffer = "";
+      return;
+    }
+    let subSegments = [formattedContent];
+    if (formattedContent.length > this.maxLength) {
+      subSegments = this.splitLongSegment(formattedContent);
+    }
+    for (const subSegment of subSegments) {
+      if (!subSegment) continue;
+      const segment = {
+        index: this.segmentIndex++,
+        content: subSegment,
+        length: subSegment.length,
+        sent: false
+      };
+      this.segments.push(segment);
+      this.onSegmentComplete?.(segment);
+    }
+    this.buffer = "";
+  }
+  /**
+   * 拆分超长分段
+   * @param segment - 超长的分段
+   * @returns 拆分后的分段数组
+   */
+  splitLongSegment(segment) {
+    const result = [];
+    let current = "";
+    for (const char of segment) {
+      current += char;
+      const shouldSplit = /[。？！，,]/.test(char);
+      if (shouldSplit && current.length <= this.maxLength) {
+        result.push(current);
+        current = "";
+      } else if (current.length >= this.maxLength) {
+        result.push(current);
+        current = "";
+      }
+    }
+    if (current) {
+      result.push(current);
+    }
+    return result.filter((s) => s.length > 0);
+  }
+  /**
+   * 完成流式输入
+   * 处理剩余的缓冲区内容
+   */
+  complete() {
+    if (this.isCompleted) return;
+    this.isCompleted = true;
+    while (this.buffer.includes("\n")) {
+      const newlineIndex = this.buffer.indexOf("\n");
+      if (newlineIndex === 0) {
+        this.buffer = this.buffer.substring(1);
+        continue;
+      }
+      const segmentBuffer = this.buffer.substring(0, newlineIndex);
+      this.buffer = this.buffer.substring(newlineIndex + 1);
+      this.flushSegmentWithBuffer(segmentBuffer);
+    }
+    if (this.buffer.trim()) {
+      this.flushSegment();
+    }
+    this.onAllComplete?.(this.segments);
+  }
+  /**
+   * 重置分段器状态
+   */
+  reset() {
+    this.buffer = "";
+    this.segmentIndex = 0;
+    this.segments = [];
+    this.isCompleted = false;
+  }
+  /**
+   * 获取当前缓冲区内容
+   */
+  getBuffer() {
+    return this.buffer;
+  }
+  /**
+   * 获取已分段的列表
+   */
+  getSegments() {
+    return this.segments;
+  }
+  /**
+   * 获取统计信息
+   */
+  getStats() {
+    return {
+      bufferLength: this.buffer.length,
+      segmentCount: this.segments.length,
+      totalChars: this.segments.reduce((sum, seg) => sum + seg.length, 0)
+    };
+  }
+};
+// src/tts/useStreamTTS.ts
+var WS_URL2 = "wss://openspeech.bytedance.com/api/v3/tts/bidirection";
+var activeInstances2 = /* @__PURE__ */ new Map();
+var sessionAudioCache = /* @__PURE__ */ new Map();
+function buildFullUrl3(url, params) {
+  const arr = [];
+  for (const key in params) {
+    if (Object.prototype.hasOwnProperty.call(params, key)) {
+      arr.push(`${key}=${encodeURIComponent(params[key])}`);
+    }
+  }
+  return `${url}?${arr.join("&")}`;
+}
+function useStreamTTS({
+  ttsConfig,
+  audioParams,
+  autoPlay = true,
+  metricsCollector = new NoopMetricsCollector(),
+  onPlayStart,
+  onPlayPause,
+  onPlayResume,
+  onPlayEnd,
+  onError,
+  visualization,
+  maxSegmentLength = 150
+}) {
+  const [isConnected, setIsConnected] = (0, import_react4.useState)(false);
+  const [isSessionStarted, setIsSessionStarted] = (0, import_react4.useState)(false);
+  const [isSynthesizing, setIsSynthesizing] = (0, import_react4.useState)(false);
+  const [isPlaying, setIsPlaying] = (0, import_react4.useState)(false);
+  const [isPaused, setIsPaused] = (0, import_react4.useState)(false);
+  const [error, setErrorState] = (0, import_react4.useState)(null);
+  const [streamText, setStreamText] = (0, import_react4.useState)("");
+  const [progress, setProgress] = (0, import_react4.useState)(0);
+  const [visualizationData, setVisualizationData] = (0, import_react4.useState)({
+    frequencyData: new Uint8Array(0),
+    timeDomainData: new Uint8Array(0)
+  });
+  const instanceId = (0, import_react4.useRef)(`tts-stream-${Date.now()}-${Math.random().toString(36).slice(2)}`).current;
+  const clientRef = (0, import_react4.useRef)(null);
+  const audioRef = (0, import_react4.useRef)(null);
+  const audioContextRef = (0, import_react4.useRef)(null);
+  const analyserRef = (0, import_react4.useRef)(null);
+  const sourceRef = (0, import_react4.useRef)(null);
+  const audioUrlRef = (0, import_react4.useRef)(null);
+  const streamTextRef = (0, import_react4.useRef)("");
+  const isConnectedRef = (0, import_react4.useRef)(false);
+  const isSessionStartedRef = (0, import_react4.useRef)(false);
+  const calledSessionStartedRef = (0, import_react4.useRef)(false);
+  const splitterRef = (0, import_react4.useRef)(null);
+  const segmentQueueRef = (0, import_react4.useRef)([]);
+  const isSendingRef = (0, import_react4.useRef)(false);
+  const sessionAudioBuffersRef = (0, import_react4.useRef)([]);
+  const isStreamFinishedRef = (0, import_react4.useRef)(false);
+  const isSessionFinishedRef = (0, import_react4.useRef)(false);
+  const resolveAllSegmentsSentRef = (0, import_react4.useRef)(null);
+  const currentVoiceRef = (0, import_react4.useRef)("");
+  const initAudioContext = (0, import_react4.useCallback)(() => {
+    if (!audioRef.current) return;
+    if (!audioContextRef.current) {
+      const AudioContextClass = window.AudioContext || window.webkitAudioContext;
+      audioContextRef.current = new AudioContextClass();
+    }
+    if (audioContextRef.current.state === "suspended") {
+      audioContextRef.current.resume();
+    }
+    if (!analyserRef.current) {
+      analyserRef.current = audioContextRef.current.createAnalyser();
+      analyserRef.current.fftSize = visualization?.fftSize || 256;
+    }
+    if (!sourceRef.current) {
+      try {
+        sourceRef.current = audioContextRef.current.createMediaElementSource(audioRef.current);
+        sourceRef.current.connect(analyserRef.current);
+        analyserRef.current.connect(audioContextRef.current.destination);
+      } catch (e) {
+      }
+    }
+  }, [visualization?.fftSize]);
+  const cleanupAudio = (0, import_react4.useCallback)(() => {
+    if (audioUrlRef.current) {
+      URL.revokeObjectURL(audioUrlRef.current);
+      audioUrlRef.current = null;
+    }
+    if (audioRef.current) {
+      audioRef.current.onerror = null;
+      audioRef.current.onended = null;
+      audioRef.current.onpause = null;
+      audioRef.current.onplay = null;
+      audioRef.current.ontimeupdate = null;
+      audioRef.current.pause();
+      audioRef.current.src = "";
+      audioRef.current = null;
+    }
+    if (sourceRef.current) {
+      try {
+        sourceRef.current.disconnect();
+      } catch (e) {
+      }
+      sourceRef.current = null;
+    }
+  }, []);
+  const stopOthers = (0, import_react4.useCallback)(() => {
+    activeInstances2.forEach((instance, id) => {
+      if (id !== instanceId) {
+        instance.pause();
+      }
+    });
+  }, [instanceId]);
+  const pause = (0, import_react4.useCallback)(() => {
+    if (audioRef.current) {
+      audioRef.current.pause();
+    }
+    setIsPaused(true);
+    setIsPlaying(false);
+    onPlayPause?.();
+  }, [onPlayPause]);
+  const resume = (0, import_react4.useCallback)(() => {
+    stopOthers();
+    if (audioRef.current) {
+      audioRef.current.play();
+    }
+    setIsPaused(false);
+    setIsPlaying(true);
+    onPlayResume?.();
+    activeInstances2.set(instanceId, { pause });
+  }, [stopOthers, instanceId, pause, onPlayResume]);
+  const sendNextSegment = (0, import_react4.useCallback)(() => {
+    if (!clientRef.current || !isSessionStartedRef.current || isSendingRef.current || isSessionFinishedRef.current) {
+      return;
+    }
+    if (segmentQueueRef.current.length === 0) {
+      if (isStreamFinishedRef.current && !isSessionFinishedRef.current) {
+        console.log("[useStreamTTS] All segments sent, finishing session");
+        isSessionFinishedRef.current = true;
+        clientRef.current.finishSession();
+        resolveAllSegmentsSentRef.current?.();
+      }
+      return;
+    }
+    isSendingRef.current = true;
+    const segment = segmentQueueRef.current.shift();
+    console.log(`[useStreamTTS] Sending segment ${segment.index}: ${segment.content.substring(0, 30)}...`);
+    clientRef.current.sendText(segment.content);
+    segment.sent = true;
+    isSendingRef.current = false;
+    setTimeout(() => sendNextSegment(), 0);
+  }, []);
+  const stop = (0, import_react4.useCallback)(() => {
+    if (clientRef.current) {
+      clientRef.current.close();
+      clientRef.current = null;
+    }
+    cleanupAudio();
+    setIsConnected(false);
+    isConnectedRef.current = false;
+    setIsSessionStarted(false);
+    isSessionStartedRef.current = false;
+    calledSessionStartedRef.current = false;
+    setIsPlaying(false);
+    setIsPaused(false);
+    setIsSynthesizing(false);
+    setProgress(0);
+    activeInstances2.delete(instanceId);
+    streamTextRef.current = "";
+    setStreamText("");
+    segmentQueueRef.current = [];
+    isSendingRef.current = false;
+    sessionAudioBuffersRef.current = [];
+    isStreamFinishedRef.current = false;
+    isSessionFinishedRef.current = false;
+    splitterRef.current?.reset();
+  }, [cleanupAudio, instanceId]);
+  const connect = (0, import_react4.useCallback)(async () => {
+    stop();
+    setErrorState(null);
+    setProgress(0);
+    sessionAudioBuffersRef.current = [];
+    isStreamFinishedRef.current = false;
+    streamTextRef.current = "";
+    setStreamText("");
+    segmentQueueRef.current = [];
+    isSendingRef.current = false;
+    isSessionStartedRef.current = false;
+    calledSessionStartedRef.current = false;
+    setIsSessionStarted(false);
+    const voice = audioParams?.speaker || "zh_female_vv_uranus_bigtts";
+    currentVoiceRef.current = voice;
+    const startTime = Date.now();
+    metricsCollector.record({
+      name: "tts_request",
+      labels: { voice, text_length: 0 },
+      value: 1,
+      timestamp: startTime
+    });
+    try {
+      const audio = new Audio();
+      audio.crossOrigin = "anonymous";
+      audioRef.current = audio;
+      audio.onplay = () => {
+        setIsPlaying(true);
+        setIsPaused(false);
+        onPlayStart?.();
+        initAudioContext();
+        activeInstances2.set(instanceId, { pause });
+      };
+      audio.onended = () => {
+        setIsPlaying(false);
+        setIsPaused(false);
+        onPlayEnd?.();
+        activeInstances2.delete(instanceId);
+      };
+      audio.onerror = (e) => {
+        console.error("[useStreamTTS] Audio playback error:", e, audio.error);
+        setErrorState(audio.error?.message || "Audio playback error");
+        onError?.(new Error(audio.error?.message || "Audio playback error"));
+      };
+      audio.ontimeupdate = () => {
+        let duration = audio.duration;
+        if (!isFinite(duration) && audio.buffered.length > 0) {
+          duration = audio.buffered.end(audio.buffered.length - 1);
+        }
+        if (isFinite(duration) && duration > 0) {
+          setProgress(audio.currentTime / duration * 100);
+        }
+      };
+      clientRef.current = (0, import_tts3.WebsocketMSE)({ autoStartSession: false });
+      splitterRef.current = new StreamingTextSplitter({
+        maxLength: maxSegmentLength,
+        onSegmentComplete: (segment) => {
+          segmentQueueRef.current.push(segment);
+          console.log(`[useStreamTTS] Segment ${segment.index} queued (${segment.length} chars)`);
+          if (isSessionStartedRef.current) {
+            sendNextSegment();
+          }
+        },
+        onAllComplete: () => {
+          console.log(`[useStreamTTS] All segments completed, total: ${segmentQueueRef.current.length} in queue`);
+        }
+      });
+      const url = clientRef.current.start({
+        url: buildFullUrl3(WS_URL2, {
+          api_access_key: `Jwt; ${ttsConfig.token}`,
+          api_app_key: ttsConfig.appid,
+          api_resource_id: ttsConfig.resourceId || "seed-tts-2.0"
+        }),
+        config: {
+          user: {
+            uid: `req-${Date.now()}`
+          },
+          namespace: ttsConfig.namespace || "BidirectionalTTS",
+          req_params: {
+            speaker: voice,
+            audio_params: {
+              sample_rate: audioParams?.sample_rate || 24e3,
+              format: audioParams?.format || "mp3",
+              speech_rate: audioParams?.speech_rate,
+              pitch_rate: audioParams?.pitch_rate,
+              loudness_rate: audioParams?.loudness_rate
+            },
+            additions: JSON.stringify({
+              enable_language_detector: true,
+              disable_markdown_filter: true,
+              enable_latex_tn: true
+            })
+          }
+        },
+        // ===== 关键回调 =====
+        onStart: () => {
+          setIsConnected(true);
+          isConnectedRef.current = true;
+          console.log("[useStreamTTS] WebSocket connected, waiting for text...");
+        },
+        onSessionStarted: () => {
+          setIsSessionStarted(true);
+          isSessionStartedRef.current = true;
+          console.log("[useStreamTTS] Session started, can send text now");
+          if (segmentQueueRef.current.length > 0) {
+            sendNextSegment();
+          }
+        },
+        onMessage: (data) => {
+          setIsSynthesizing(true);
+          if (sessionAudioBuffersRef.current.length === 0) {
+            metricsCollector.record({
+              name: "tts_latency",
+              labels: { stage: "first_packet", voice },
+              value: Date.now() - startTime,
+              timestamp: Date.now()
+            });
+          }
+          const buffer = data instanceof ArrayBuffer ? data.slice(0) : new Uint8Array(data).buffer;
+          sessionAudioBuffersRef.current.push(buffer);
+        },
+        onSessionFinished: () => {
+          setIsSynthesizing(false);
+          setIsSessionStarted(false);
+          isSessionStartedRef.current = false;
+          calledSessionStartedRef.current = false;
+          if (sessionAudioBuffersRef.current.length > 0 && streamTextRef.current) {
+            const speed = audioParams?.speech_rate || 0;
+            const cacheKey = TTSCache.generateKey(streamTextRef.current, voice, speed);
+            TTSCache.set(cacheKey, [...sessionAudioBuffersRef.current]);
+            sessionAudioCache.set(instanceId, {
+              streamText: streamTextRef.current,
+              audioBuffers: [...sessionAudioBuffersRef.current],
+              timestamp: Date.now(),
+              voice,
+              speed
+            });
+            console.log(`[useStreamTTS] Session finished, cached ${sessionAudioBuffersRef.current.length} audio buffers`);
+          }
+          metricsCollector.record({
+            name: "tts_synthesis_finished",
+            labels: { voice, text_length: streamTextRef.current.length },
+            value: Date.now() - startTime,
+            timestamp: Date.now()
+          });
+        },
+        onError: (err) => {
+          console.error("[useStreamTTS] TTS error:", err);
+          setErrorState(err.msg || "TTS error");
+          onError?.(new Error(err.msg || "TTS error"));
+          setIsSynthesizing(false);
+        }
+      });
+      audioUrlRef.current = url;
+      audio.src = url;
+      if (autoPlay) {
+        try {
+          await audio.play();
+        } catch (e) {
+          console.warn("[useStreamTTS] Autoplay blocked:", e);
+        }
+      }
+    } catch (err) {
+      console.error("[useStreamTTS] Connect error:", err);
+      setErrorState(String(err));
+      onError?.(err instanceof Error ? err : new Error(String(err)));
+    }
+  }, [
+    ttsConfig,
+    audioParams,
+    autoPlay,
+    stop,
+    instanceId,
+    onPlayStart,
+    onPlayEnd,
+    initAudioContext,
+    pause,
+    metricsCollector,
+    maxSegmentLength,
+    sendNextSegment,
+    onError
+  ]);
+  const onMessage = (0, import_react4.useCallback)((chunk) => {
+    if (!chunk) return;
+    streamTextRef.current += chunk;
+    setStreamText(streamTextRef.current);
+    if (!calledSessionStartedRef.current && !isSessionStartedRef.current && clientRef.current && isConnectedRef.current) {
+      console.log("[useStreamTTS] First text received, starting session...");
+      calledSessionStartedRef.current = true;
+      clientRef.current.startSession();
+    }
+    splitterRef.current?.onChunk(chunk);
+  }, []);
+  const finishStream = (0, import_react4.useCallback)(async () => {
+    isStreamFinishedRef.current = true;
+    splitterRef.current?.complete();
+    console.log(`[useStreamTTS] Stream finished, ${segmentQueueRef.current.length} segments remaining in queue`);
+    if (segmentQueueRef.current.length > 0 || isSendingRef.current) {
+      await new Promise((resolve) => {
+        resolveAllSegmentsSentRef.current = resolve;
+      });
+    } else if (clientRef.current && isSessionStartedRef.current && !isSessionFinishedRef.current) {
+      isSessionFinishedRef.current = true;
+      clientRef.current.finishSession();
+    }
+  }, []);
+  const seek = (0, import_react4.useCallback)((percentage) => {
+    if (audioRef.current) {
+      let duration = audioRef.current.duration;
+      if (!isFinite(duration) && audioRef.current.buffered.length > 0) {
+        duration = audioRef.current.buffered.end(audioRef.current.buffered.length - 1);
+      }
+      if (isFinite(duration) && duration > 0) {
+        const time = percentage / 100 * duration;
+        if (isFinite(time)) {
+          audioRef.current.currentTime = time;
+          setProgress(percentage);
+        }
+      }
+    }
+  }, []);
+  const getFrequencyData = (0, import_react4.useCallback)(() => {
+    if (!analyserRef.current) return new Uint8Array(0);
+    const dataArray = new Uint8Array(analyserRef.current.frequencyBinCount);
+    analyserRef.current.getByteFrequencyData(dataArray);
+    return dataArray;
+  }, []);
+  const getTimeDomainData = (0, import_react4.useCallback)(() => {
+    if (!analyserRef.current) return new Uint8Array(0);
+    const dataArray = new Uint8Array(analyserRef.current.frequencyBinCount);
+    analyserRef.current.getByteTimeDomainData(dataArray);
+    return dataArray;
+  }, []);
+  (0, import_react4.useEffect)(() => {
+    if (!visualization?.enabled) return;
+    let animId;
+    let lastUpdate = 0;
+    const interval = visualization.refreshInterval || 0;
+    const update = (timestamp) => {
+      if (isPlaying && !isPaused) {
+        if (timestamp - lastUpdate >= interval) {
+          setVisualizationData({
+            frequencyData: getFrequencyData(),
+            timeDomainData: getTimeDomainData()
+          });
+          lastUpdate = timestamp;
+        }
+        animId = requestAnimationFrame(update);
+      }
+    };
+    if (isPlaying && !isPaused) {
+      animId = requestAnimationFrame(update);
+    }
+    return () => {
+      if (animId) cancelAnimationFrame(animId);
+    };
+  }, [isPlaying, isPaused, visualization, getFrequencyData, getTimeDomainData]);
+  (0, import_react4.useEffect)(() => {
+    return () => {
+      stop();
+      if (audioContextRef.current) {
+        audioContextRef.current.close();
+      }
+    };
+  }, [stop]);
+  return {
+    isConnected,
+    isSessionStarted,
+    isSynthesizing,
+    isPlaying,
+    isPaused,
+    error,
+    streamText,
+    progress,
+    connect,
+    onMessage,
+    finishStream,
+    pause,
+    resume,
+    stop,
+    seek,
+    getFrequencyData,
+    getTimeDomainData,
+    visualizationData
+  };
+}
+function getSessionAudioCache(instanceId) {
+  return sessionAudioCache.get(instanceId);
+}
+function clearSessionAudioCache(instanceId) {
+  sessionAudioCache.delete(instanceId);
+}
+function findSessionCacheByText(streamText, voice, speed) {
+  for (const entry of sessionAudioCache.values()) {
+    if (entry.streamText === streamText && entry.voice === voice && entry.speed === speed) {
+      return entry;
+    }
+  }
+  return void 0;
+}
+// src/components/AudioWaveVisualizer.tsx
+var import_react5 = require("react");
 var import_jsx_runtime = require("react/jsx-runtime");
 var AudioWaveVisualizer = ({
   isPlaying,
@@ -1041,8 +1787,8 @@ var AudioWaveVisualizer = ({
   className,
   styleObj
 }) => {
-  const canvasRef = (0, import_react4.useRef)(null);
-  const requestRef = (0, import_react4.useRef)(null);
+  const canvasRef = (0, import_react5.useRef)(null);
+  const requestRef = (0, import_react5.useRef)(null);
   const progressBackground = Array.isArray(color) ? `linear-gradient(90deg, ${color[0]}, ${color[1]})` : color;
   const textColor = Array.isArray(color) ? color[0] : color;
   const draw = () => {
@@ -1107,7 +1853,7 @@ var AudioWaveVisualizer = ({
       requestRef.current = requestAnimationFrame(draw);
     }
   };
-  (0, import_react4.useEffect)(() => {
+  (0, import_react5.useEffect)(() => {
     if (isPlaying && !isPaused) {
       requestRef.current = requestAnimationFrame(draw);
     } else {
@@ -1151,7 +1897,7 @@ var AudioWaveVisualizer = ({
 var AudioWaveVisualizer_default = AudioWaveVisualizer;
 // src/components/AudioProgressBar.tsx
-var import_react5 = require("react");
+var import_react6 = require("react");
 var import_jsx_runtime2 = require("react/jsx-runtime");
 var AudioProgressBar = ({
   progress,
@@ -1164,16 +1910,16 @@ var AudioProgressBar = ({
   className,
   style
 }) => {
-  const progressBarRef = (0, import_react5.useRef)(null);
-  const containerRef = (0, import_react5.useRef)(null);
-  const progressTextRef = (0, import_react5.useRef)(null);
-  const thumbRef = (0, import_react5.useRef)(null);
-  const displayedProgress = (0, import_react5.useRef)(0);
-  const requestRef = (0, import_react5.useRef)(null);
-  const isDragging = (0, import_react5.useRef)(false);
-  const isHovering = (0, import_react5.useRef)(false);
-  const isTouch = (0, import_react5.useRef)(false);
-  (0, import_react5.useEffect)(() => {
+  const progressBarRef = (0, import_react6.useRef)(null);
+  const containerRef = (0, import_react6.useRef)(null);
+  const progressTextRef = (0, import_react6.useRef)(null);
+  const thumbRef = (0, import_react6.useRef)(null);
+  const displayedProgress = (0, import_react6.useRef)(0);
+  const requestRef = (0, import_react6.useRef)(null);
+  const isDragging = (0, import_react6.useRef)(false);
+  const isHovering = (0, import_react6.useRef)(false);
+  const isTouch = (0, import_react6.useRef)(false);
+  (0, import_react6.useEffect)(() => {
     const match = window.matchMedia("(pointer: coarse)");
     isTouch.current = match.matches;
     if (isTouch.current && thumbRef.current) {
@@ -1212,7 +1958,7 @@ var AudioProgressBar = ({
       requestRef.current = requestAnimationFrame(animate);
     }
   };
-  (0, import_react5.useEffect)(() => {
+  (0, import_react6.useEffect)(() => {
     requestRef.current = requestAnimationFrame(animate);
     return () => {
       if (requestRef.current) {
@@ -1399,8 +2145,13 @@ var AudioProgressBar_default = AudioProgressBar;
 0 && (module.exports = {
   AudioProgressBar,
   AudioWaveVisualizer,
+  StreamingTextSplitter,
+  clearSessionAudioCache,
+  findSessionCacheByText,
+  getSessionAudioCache,
   splitTextByDelimiters,
   useMessageTTS,
+  useStreamTTS,
   useVolcanoASR,
   useVolcanoTTS
 });