npm - @amaster.ai/tts-client - Versions diffs - 1.1.9 → 1.1.10 - Mend

@amaster.ai/tts-client 1.1.9 → 1.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs CHANGED Viewed

@@ -20,184 +20,1013 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 // src/index.ts
 var index_exports = {};
 __export(index_exports, {
-  createTTSClient: () => tts_client_default
+  createTTSClient: () => createTTSClient,
+  createTTSSpeakController: () => createTTSSpeakController,
+  preprocessTTSContent: () => preprocessTTSContent,
+  splitTextIntoFragments: () => splitTextIntoFragments
 });
 module.exports = __toCommonJS(index_exports);
 // src/tts-client.ts
 var TTS_PATH = "/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime";
-function createTTSClient(config) {
+var TTS_MAX_FRAGMENT_LENGTH = 2e3;
+function isBrowserEnvironment() {
+  return typeof window !== "undefined";
+}
+function getWeightedTextLength(text) {
+  let length = 0;
+  for (const char of text) {
+    length += /[\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff]/.test(char) ? 2 : 1;
+  }
+  return length;
+}
+function splitOversizedSegment(segment, maxLength) {
+  const fragments = [];
+  let current = "";
+  let currentLength = 0;
+  for (const char of segment) {
+    const charLength = getWeightedTextLength(char);
+    if (current && currentLength + charLength > maxLength) {
+      fragments.push(current);
+      current = char;
+      currentLength = charLength;
+      continue;
+    }
+    current += char;
+    currentLength += charLength;
+  }
+  if (current) {
+    fragments.push(current);
+  }
+  return fragments;
+}
+function splitTextIntoFragments(text, maxLength = TTS_MAX_FRAGMENT_LENGTH) {
+  const fragments = [];
+  const segments = text.match(/.+?(?:\r?\n+|$)/gs) ?? [];
+  const softBreakPattern = /(?<=[。！？；.!?;])|(?<=[，、,:：])|(?<=\s)/;
+  let current = "";
+  const pushFragment = (fragment) => {
+    const trimmed = fragment.trim();
+    if (trimmed) {
+      fragments.push(trimmed);
+    }
+  };
+  const appendPart = (part) => {
+    if (!part.trim()) {
+      return;
+    }
+    if (getWeightedTextLength(part) > maxLength) {
+      for (const fragment of splitOversizedSegment(part, maxLength)) {
+        pushFragment(fragment);
+      }
+      current = "";
+      return;
+    }
+    const next = current ? `${current}${part}` : part;
+    if (getWeightedTextLength(next) <= maxLength) {
+      current = next;
+      return;
+    }
+    pushFragment(current);
+    current = part;
+  };
+  for (const segment of segments) {
+    const parts = segment.split(softBreakPattern).filter((part) => part.trim());
+    if (!parts.length) {
+      continue;
+    }
+    for (const part of parts) {
+      appendPart(part);
+    }
+  }
+  pushFragment(current);
+  return fragments;
+}
+function normalizeWhitespace(text) {
+  return text.replace(/\r\n/g, "\n").replace(/\n{3,}/g, "\n\n").replace(/[ \t]{2,}/g, " ");
+}
+function stripMarkdownTables(text) {
+  return text.replace(
+    /^\|(.+)\|$/gm,
+    (_, row) => row.split("|").map((cell) => cell.trim()).filter(Boolean).join("\uFF0C")
+  );
+}
+function stripMarkdown(text) {
+  return text.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/!\[([^\]]*)\]\([^)]+\)/g, "$1").replace(/\[([^\]]+)\]\(([^)]+)\)/g, "$1").replace(/^#{1,6}\s+/gm, "").replace(/^\s*>\s?/gm, "").replace(/^\s*[-*+]\s+/gm, "").replace(/^\s*\d+\.\s+/gm, "").replace(/[*_~#>]+/g, "");
+}
+function stripUrls(text) {
+  return text.replace(/https?:\/\/\S+/gi, " ");
+}
+function stripEmojiAndSymbols(text) {
+  return text.replace(/[\u{1F000}-\u{1FAFF}\u{2600}-\u{27BF}]/gu, " ");
+}
+function preprocessTTSContent(text) {
+  const normalized = normalizeWhitespace(text);
+  const withoutTables = stripMarkdownTables(normalized);
+  const withoutMarkdown = stripMarkdown(withoutTables);
+  const withoutUrls = stripUrls(withoutMarkdown);
+  const withoutEmoji = stripEmojiAndSymbols(withoutUrls);
+  return withoutEmoji.replace(/[|]/g, "\uFF0C").replace(/[ \t]+\n/g, "\n").replace(/\n+/g, "\n").replace(/[ ]{2,}/g, " ").trim();
+}
+function decodeBase64Chunk(chunk) {
+  const binaryString = atob(chunk);
+  const bytes = new Uint8Array(binaryString.length);
+  for (let index = 0; index < binaryString.length; index += 1) {
+    bytes[index] = binaryString.charCodeAt(index);
+  }
+  return bytes;
+}
+function concatUint8Arrays(items) {
+  let totalLength = 0;
+  for (const item of items) {
+    totalLength += item.length;
+  }
+  const result = new Uint8Array(totalLength);
+  let offset = 0;
+  for (const item of items) {
+    result.set(item, offset);
+    offset += item.length;
+  }
+  return result;
+}
+function pcmToWav(pcmBytes, sampleRate) {
+  const header = new ArrayBuffer(44);
+  const view = new DataView(header);
+  const dataSize = pcmBytes.byteLength;
+  const writeString = (offset, value) => {
+    for (let index = 0; index < value.length; index += 1) {
+      view.setUint8(offset + index, value.charCodeAt(index));
+    }
+  };
+  writeString(0, "RIFF");
+  view.setUint32(4, 36 + dataSize, true);
+  writeString(8, "WAVE");
+  writeString(12, "fmt ");
+  view.setUint32(16, 16, true);
+  view.setUint16(20, 1, true);
+  view.setUint16(22, 1, true);
+  view.setUint32(24, sampleRate, true);
+  view.setUint32(28, sampleRate * 2, true);
+  view.setUint16(32, 2, true);
+  view.setUint16(34, 16, true);
+  writeString(36, "data");
+  view.setUint32(40, dataSize, true);
+  return concatUint8Arrays([new Uint8Array(header), pcmBytes]);
+}
+function bytesToDataUri(bytes, mimeType) {
+  let binary = "";
+  for (const byte of bytes) {
+    binary += String.fromCharCode(byte);
+  }
+  return `data:${mimeType};base64,${btoa(binary)}`;
+}
+function createBrowserPlaybackBackend(config) {
+  const { sampleRate, onAudioStart, onAudioEnd, onError } = config;
+  let audioContext = null;
+  const audioSources = /* @__PURE__ */ new Set();
+  let nextScheduleTime = 0;
+  let streamEndHandled = false;
+  let streamingStarted = false;
+  const ensureAudioContext = () => {
+    if (!audioContext && typeof AudioContext !== "undefined") {
+      audioContext = new AudioContext({ sampleRate });
+    }
+    return audioContext;
+  };
+  const createAudioBufferFromPCM = (bytes) => {
+    const numSamples = Math.floor(bytes.length / 2);
+    const buffer = new AudioBuffer({ length: numSamples, sampleRate });
+    const channelData = buffer.getChannelData(0);
+    const dataView = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
+    for (let index = 0; index < numSamples; index += 1) {
+      const int16 = dataView.getInt16(index * 2, true);
+      channelData[index] = int16 / 32768;
+    }
+    return buffer;
+  };
+  const stop = () => {
+    for (const source of audioSources) {
+      try {
+        source.onended = null;
+        source.stop();
+      } catch {
+      }
+      source.disconnect();
+    }
+    audioSources.clear();
+    nextScheduleTime = 0;
+    streamEndHandled = false;
+    streamingStarted = false;
+  };
+  return {
+    kind: "browser",
+    hasStreamingPlayback: true,
+    isPlaying() {
+      return audioSources.size > 0;
+    },
+    stop,
+    async playBuffered(input) {
+      if (input.audioFormat !== "pcm") {
+        onError?.(new Error(`Built-in playback only supports pcm, got ${input.audioFormat}`));
+        return;
+      }
+      const context = ensureAudioContext();
+      if (!context || !input.chunks.length) {
+        return;
+      }
+      stop();
+      const chunkBytes = input.chunks.map((chunk) => decodeBase64Chunk(chunk));
+      const combined = concatUint8Arrays(chunkBytes);
+      const buffer = createAudioBufferFromPCM(combined);
+      const source = context.createBufferSource();
+      if (context.state === "suspended") {
+        await context.resume();
+      }
+      source.buffer = buffer;
+      source.connect(context.destination);
+      audioSources.add(source);
+      source.onended = () => {
+        audioSources.delete(source);
+        source.disconnect();
+        onAudioEnd?.();
+      };
+      onAudioStart?.();
+      source.start(0);
+    },
+    async scheduleStreamingChunk(input) {
+      if (input.audioFormat !== "pcm") {
+        onError?.(new Error(`Built-in playback only supports pcm, got ${input.audioFormat}`));
+        return;
+      }
+      const context = ensureAudioContext();
+      if (!context) {
+        return;
+      }
+      if (context.state === "suspended") {
+        await context.resume();
+      }
+      const chunkBytes = decodeBase64Chunk(input.chunk);
+      const chunkBuffer = createAudioBufferFromPCM(chunkBytes);
+      const source = context.createBufferSource();
+      const leadTime = 0.05;
+      if (!nextScheduleTime) {
+        nextScheduleTime = Math.max(context.currentTime + leadTime, leadTime);
+      }
+      source.buffer = chunkBuffer;
+      source.connect(context.destination);
+      audioSources.add(source);
+      source.onended = () => {
+        audioSources.delete(source);
+        source.disconnect();
+        if (streamEndHandled && audioSources.size === 0) {
+          nextScheduleTime = 0;
+          streamEndHandled = false;
+          streamingStarted = false;
+          onAudioEnd?.();
+        }
+      };
+      if (!streamingStarted) {
+        streamingStarted = true;
+        onAudioStart?.();
+      }
+      source.start(nextScheduleTime);
+      nextScheduleTime += chunkBuffer.duration;
+    },
+    finalizeStreaming() {
+      streamEndHandled = true;
+      if (audioSources.size === 0 && streamingStarted) {
+        nextScheduleTime = 0;
+        streamEndHandled = false;
+        streamingStarted = false;
+        onAudioEnd?.();
+      }
+    },
+    close() {
+      stop();
+      if (audioContext) {
+        void audioContext.close().catch(() => {
+        });
+        audioContext = null;
+      }
+    }
+  };
+}
+function createMiniProgramPlaybackBackend(config) {
+  const { runtime, onAudioStart, onAudioEnd, onError } = config;
+  const taro = runtime?.Taro;
+  const createInnerAudioContext = taro?.createInnerAudioContext;
+  const getFileSystemManager = taro?.getFileSystemManager;
+  const userDataPath = taro?.env?.USER_DATA_PATH;
+  let innerAudio = createInnerAudioContext?.() ?? null;
+  let currentTempFile = null;
+  let playing = false;
+  const bindEvents = () => {
+    innerAudio?.onPlay?.(() => {
+      playing = true;
+      onAudioStart?.();
+    });
+    innerAudio?.onEnded?.(() => {
+      playing = false;
+      onAudioEnd?.();
+    });
+    innerAudio?.onStop?.(() => {
+      playing = false;
+    });
+    innerAudio?.onError?.((error) => {
+      playing = false;
+      onError?.(new Error(error?.errMsg || "Mini-program audio playback failed"));
+    });
+  };
+  bindEvents();
+  const cleanupTempFile = () => {
+    if (!currentTempFile) {
+      return;
+    }
+    const pathToDelete = currentTempFile;
+    currentTempFile = null;
+    getFileSystemManager?.()?.unlink?.({
+      filePath: pathToDelete,
+      fail: () => {
+      }
+    });
+  };
+  const writeTempAudioFile = async (bytes) => {
+    if (!userDataPath || !getFileSystemManager) {
+      return bytesToDataUri(bytes, "audio/wav");
+    }
+    const filePath = `${userDataPath}/amaster-tts-${Date.now()}-${Math.random().toString(16).slice(2)}.wav`;
+    const fsManager = getFileSystemManager();
+    if (!fsManager?.writeFile) {
+      return bytesToDataUri(bytes, "audio/wav");
+    }
+    await new Promise((resolve, reject) => {
+      fsManager.writeFile?.({
+        filePath,
+        data: bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength),
+        success: () => resolve(),
+        fail: (error) => reject(error)
+      });
+    });
+    cleanupTempFile();
+    currentTempFile = filePath;
+    return filePath;
+  };
+  return {
+    kind: "mini-program",
+    hasStreamingPlayback: false,
+    isPlaying() {
+      return playing;
+    },
+    stop() {
+      innerAudio?.stop?.();
+      playing = false;
+    },
+    async playBuffered(input) {
+      if (input.audioFormat !== "pcm") {
+        onError?.(new Error(`Mini-program built-in playback only supports pcm, got ${input.audioFormat}`));
+        return;
+      }
+      const pcmChunks = input.chunks.map((chunk) => decodeBase64Chunk(chunk));
+      const wavBytes = pcmToWav(concatUint8Arrays(pcmChunks), input.sampleRate);
+      const source = await writeTempAudioFile(wavBytes);
+      if (!innerAudio && createInnerAudioContext) {
+        innerAudio = createInnerAudioContext();
+        bindEvents();
+      }
+      if (!innerAudio?.play) {
+        onError?.(new Error("Mini-program audio context is unavailable"));
+        return;
+      }
+      innerAudio.src = source;
+      innerAudio.autoplay = false;
+      innerAudio.obeyMuteSwitch = false;
+      innerAudio.play();
+    },
+    close() {
+      innerAudio?.stop?.();
+      innerAudio?.destroy?.();
+      innerAudio = null;
+      playing = false;
+      cleanupTempFile();
+    }
+  };
+}
+function resolvePlaybackBackend(config) {
+  if (config.runtime?.Taro?.createInnerAudioContext && !isBrowserEnvironment()) {
+    return createMiniProgramPlaybackBackend(config);
+  }
+  if (typeof AudioContext !== "undefined") {
+    return createBrowserPlaybackBackend(config);
+  }
+  return null;
+}
+function createRawTTSClient(config) {
   const {
     getAccessToken,
     voice = "Cherry",
     autoPlay = true,
     audioFormat = "pcm",
     sampleRate = 24e3,
+    runtime,
     onReady,
     onAudioStart,
     onAudioEnd,
     onAudioChunk,
-    onError
+    onError,
+    onClose
   } = config;
+  void getAccessToken;
   let ws = null;
+  let connected = false;
   let audioChunks = [];
-  let audioContext = null;
-  let audioSource = null;
+  let responseDone = false;
+  let autoPlayed = false;
+  let playbackSuppressed = false;
+  let playbackBackend = resolvePlaybackBackend({
+    runtime,
+    sampleRate,
+    onAudioStart,
+    onAudioEnd,
+    onError
+  });
+  function buildWsUrl() {
+    let path = TTS_PATH;
+    return path;
+  }
+  function play() {
+    playbackSuppressed = false;
+    if (!audioChunks.length || !playbackBackend) {
+      return;
+    }
+    if (!responseDone && playbackBackend.hasStreamingPlayback) {
+      return;
+    }
+    void playbackBackend.playBuffered({
+      chunks: [...audioChunks],
+      sampleRate,
+      audioFormat
+    });
+  }
+  function stop() {
+    playbackSuppressed = true;
+    playbackBackend?.stop();
+  }
+  function close() {
+    stop();
+    if (ws) {
+      ws.close();
+      ws = null;
+    }
+    connected = false;
+    playbackBackend?.close();
+    playbackBackend = resolvePlaybackBackend({
+      runtime,
+      sampleRate,
+      onAudioStart,
+      onAudioEnd,
+      onError
+    });
+    onClose?.();
+  }
   async function connect() {
-    return new Promise((resolve, reject) => {
-      let wsUrl = TTS_PATH;
-      if (getAccessToken) {
-        const token = getAccessToken();
-        if (token) {
-          const separator = wsUrl.includes("?") ? "&" : "?";
-          wsUrl = `${wsUrl}${separator}token=${encodeURIComponent(token)}`;
+    if (connected && ws?.readyState === WebSocket.OPEN) {
+      return;
+    }
+    await new Promise((resolve, reject) => {
+      const socket = new WebSocket(buildWsUrl());
+      ws = socket;
+      let settled = false;
+      const settleResolve = () => {
+        if (settled) {
+          return;
         }
-      }
-      ws = new WebSocket(wsUrl);
-      ws.onopen = () => {
+        settled = true;
+        connected = true;
+        resolve();
       };
-      ws.onmessage = (event) => {
-        const data = JSON.parse(event.data);
-        if (data.type === "session.created") {
-          ws.send(
-            JSON.stringify({
-              type: "session.update",
-              session: {
-                mode: "server_commit",
-                voice,
-                response_format: audioFormat,
-                sample_rate: sampleRate
-              }
-            })
-          );
+      const settleReject = (error) => {
+        if (settled) {
+          return;
         }
-        if (data.type === "session.updated") {
-          onReady?.();
-          resolve();
-        }
-        if (data.type === "response.audio.delta") {
-          audioChunks.push(data.delta);
-          onAudioChunk?.(audioChunks);
-        }
-        if (data.type === "response.audio.done") {
-          onAudioChunk?.(audioChunks);
-          if (autoPlay && typeof window !== "undefined") {
-            playAudio();
+        settled = true;
+        connected = false;
+        reject(error);
+      };
+      socket.onmessage = async (event) => {
+        try {
+          const data = JSON.parse(event.data);
+          if (data.type === "session.created") {
+            socket.send(
+              JSON.stringify({
+                type: "session.update",
+                session: {
+                  mode: "server_commit",
+                  voice,
+                  response_format: audioFormat,
+                  sample_rate: sampleRate
+                }
+              })
+            );
           }
-        }
-        if (data.type === "response.done") {
-          ws.send(JSON.stringify({ type: "session.finish" }));
-        }
-        if (data.type === "error") {
-          const err = new Error(data.error?.message || "Unknown error");
-          onError?.(err);
-          reject(err);
+          if (data.type === "session.updated") {
+            onReady?.();
+            settleResolve();
+            return;
+          }
+          if (data.type === "response.audio.delta") {
+            audioChunks.push(data.delta);
+            onAudioChunk?.([...audioChunks]);
+            if (autoPlay && !playbackSuppressed && playbackBackend?.hasStreamingPlayback) {
+              autoPlayed = true;
+              await playbackBackend.scheduleStreamingChunk?.({
+                chunk: data.delta,
+                sampleRate,
+                audioFormat
+              });
+            }
+            return;
+          }
+          if (data.type === "response.audio.done") {
+            responseDone = true;
+            onAudioChunk?.([...audioChunks]);
+            if (playbackBackend?.hasStreamingPlayback) {
+              playbackBackend.finalizeStreaming?.();
+              return;
+            }
+            if (autoPlay && !playbackSuppressed && !autoPlayed) {
+              autoPlayed = true;
+              await playbackBackend?.playBuffered({
+                chunks: [...audioChunks],
+                sampleRate,
+                audioFormat
+              });
+            }
+            return;
+          }
+          if (data.type === "error") {
+            const error = new Error(data.error?.message || "Unknown TTS error");
+            onError?.(error);
+            settleReject(error);
+          }
+        } catch (error) {
+          const parsedError = error instanceof Error ? error : new Error(String(error));
+          onError?.(parsedError);
+          settleReject(parsedError);
         }
       };
-      ws.onerror = () => {
-        const err = new Error("WebSocket connection error");
-        onError?.(err);
-        reject(err);
+      socket.onerror = () => {
+        const error = new Error("WebSocket connection error");
+        onError?.(error);
+        settleReject(error);
       };
-      ws.onclose = () => {
+      socket.onclose = () => {
+        connected = false;
         ws = null;
       };
     });
   }
-  async function speak(text) {
+  function resetSynthesisState() {
+    stop();
+    audioChunks = [];
+    responseDone = false;
+    autoPlayed = false;
+    playbackSuppressed = false;
+  }
+  function ensureSocketReady() {
     if (!ws || ws.readyState !== WebSocket.OPEN) {
       throw new Error("WebSocket not connected");
     }
-    audioChunks = [];
-    ws.send(
-      JSON.stringify({
-        type: "input_text_buffer.append",
-        text
-      })
-    );
-    setTimeout(() => {
-      ws.send(
+  }
+  function appendText(text) {
+    const normalizedText = preprocessTTSContent(text);
+    const fragments = splitTextIntoFragments(normalizedText);
+    if (!fragments.length) {
+      return;
+    }
+    ensureSocketReady();
+    const socket = ws;
+    for (const fragment of fragments) {
+      socket?.send(
         JSON.stringify({
-          type: "input_text_buffer.commit"
+          type: "input_text_buffer.append",
+          text: fragment
         })
       );
-    }, 100);
-  }
-  function playAudio() {
-    let chunks = audioChunks;
-    if (typeof window === "undefined") return;
-    try {
-      if (!audioContext) {
-        audioContext = new AudioContext();
-      }
-      onAudioStart?.();
-      let totalBytes = 0;
-      const allBytes = [];
-      for (const chunk of chunks) {
-        const binaryString = atob(chunk);
-        const bytes = new Uint8Array(binaryString.length);
-        for (let i = 0; i < binaryString.length; i++) {
-          bytes[i] = binaryString.charCodeAt(i);
-        }
-        allBytes.push(bytes);
-        totalBytes += bytes.length;
-      }
-      const combined = new Uint8Array(totalBytes);
-      let offset = 0;
-      for (const bytes of allBytes) {
-        combined.set(bytes, offset);
-        offset += bytes.length;
-      }
-      const numSamples = combined.length / 2;
-      const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);
-      const channelData = audioBuffer.getChannelData(0);
-      const dataView = new DataView(combined.buffer);
-      for (let i = 0; i < numSamples; i++) {
-        const int16 = dataView.getInt16(i * 2, true);
-        channelData[i] = int16 / 32768;
-      }
-      const source = audioContext.createBufferSource();
-      source.buffer = audioBuffer;
-      source.connect(audioContext.destination);
-      source.onended = () => onAudioEnd?.();
-      source.start(0);
-      audioSource = source;
-    } catch (err) {
-      onError?.(err);
     }
   }
-  function stopAudio() {
-    if (audioSource) {
-      audioSource.stop();
-      audioSource = null;
-    }
-    if (audioContext) {
-      audioContext.close();
-      audioContext = null;
-    }
+  function commitText() {
+    ensureSocketReady();
+    const socket = ws;
+    socket?.send(
+      JSON.stringify({
+        type: "input_text_buffer.commit"
+      })
+    );
   }
-  function close() {
-    if (ws) {
-      ws.close();
-      ws = null;
-    }
-    stopAudio();
+  function startStream() {
+    resetSynthesisState();
+  }
+  async function speak(text) {
+    startStream();
+    appendText(text);
+    commitText();
   }
   return {
     connect,
     speak,
+    startStream,
+    appendText,
+    commitText,
+    play,
+    stop,
     close,
-    play: playAudio,
-    stop: stopAudio
+    isConnected() {
+      return connected && ws?.readyState === WebSocket.OPEN;
+    },
+    hasAudio() {
+      return audioChunks.length > 0;
+    },
+    isResponseDone() {
+      return responseDone;
+    },
+    isPlaying() {
+      return playbackBackend?.isPlaying() ?? false;
+    },
+    isStreamingPlayback() {
+      return playbackBackend?.hasStreamingPlayback ?? false;
+    }
   };
 }
-var tts_client_default = (authConfig) => {
-  return (config) => {
-    return createTTSClient({ ...authConfig, ...config });
+function defaultSnapshot(voice) {
+  return {
+    status: "idle",
+    activeId: null,
+    error: null,
+    requestId: 0,
+    text: null,
+    voice,
+    fallbackMode: "none"
   };
-};
+}
+function canUseSystemSpeech() {
+  return isBrowserEnvironment() && "speechSynthesis" in window && "SpeechSynthesisUtterance" in window;
+}
+function systemSpeak(text, options) {
+  if (!canUseSystemSpeech()) {
+    throw new Error("SpeechSynthesis is not supported");
+  }
+  const utterance = new SpeechSynthesisUtterance(text);
+  utterance.onstart = () => {
+    options.onStart?.();
+  };
+  utterance.onerror = (event) => {
+    options.onError?.(new Error(event.error || "Speech synthesis failed"));
+  };
+  utterance.onend = () => {
+    options.onEnd?.();
+  };
+  window.speechSynthesis.cancel();
+  window.speechSynthesis.speak(utterance);
+}
+function stopSystemSpeech() {
+  if (canUseSystemSpeech()) {
+    window.speechSynthesis.cancel();
+  }
+}
+function createTTSSpeakController(createClient, options = {}) {
+  const listeners = /* @__PURE__ */ new Set();
+  const persistedVoice = options.voiceStorageKey && options.storage ? options.storage.getItem(options.voiceStorageKey) || null : null;
+  let client = null;
+  let snapshot = defaultSnapshot(persistedVoice);
+  let streamActive = false;
+  let streamId = null;
+  const emit = () => {
+    for (const listener of listeners) {
+      listener(snapshot);
+    }
+  };
+  const setSnapshot = (next) => {
+    snapshot = {
+      ...snapshot,
+      ...next
+    };
+    emit();
+  };
+  const persistVoice = (voice) => {
+    if (!options.voiceStorageKey || !options.storage) {
+      return;
+    }
+    if (!voice) {
+      options.storage.removeItem?.(options.voiceStorageKey);
+      return;
+    }
+    options.storage.setItem(options.voiceStorageKey, voice);
+  };
+  const reset = (requestId, preserved) => {
+    snapshot = {
+      status: "idle",
+      activeId: null,
+      error: null,
+      requestId,
+      text: preserved?.text ?? null,
+      voice: preserved?.voice ?? snapshot.voice,
+      fallbackMode: "none"
+    };
+    emit();
+  };
+  const stop = (stopOptions) => {
+    const preserveClient = stopOptions?.preserveClient ?? true;
+    const nextRequestId = snapshot.requestId + 1;
+    streamActive = false;
+    streamId = null;
+    client?.stop();
+    stopSystemSpeech();
+    if (!preserveClient || !client || !snapshot.text) {
+      client?.close();
+      client = null;
+      reset(nextRequestId, { voice: snapshot.voice });
+      return;
+    }
+    reset(nextRequestId, {
+      text: snapshot.text,
+      voice: snapshot.voice
+    });
+  };
+  const createManagedClient = (input, requestId, content) => {
+    const nextVoice = input.voice ?? snapshot.voice ?? void 0;
+    const nextClient = createClient({
+      voice: nextVoice,
+      autoPlay: true,
+      audioFormat: input.audioFormat,
+      sampleRate: input.sampleRate,
+      runtime: options.runtime,
+      onReady: () => {
+        if (client !== nextClient) {
+          return;
+        }
+        setSnapshot({
+          status: "connecting",
+          error: null
+        });
+      },
+      onAudioStart: () => {
+        if (client !== nextClient) {
+          return;
+        }
+        setSnapshot({
+          status: "speaking",
+          error: null,
+          fallbackMode: "none"
+        });
+      },
+      onAudioEnd: () => {
+        if (client !== nextClient) {
+          return;
+        }
+        setSnapshot({
+          status: streamActive ? "connecting" : "idle",
+          activeId: streamActive ? streamId : null,
+          error: null,
+          fallbackMode: "none"
+        });
+      },
+      onError: (error) => {
+        if (client !== nextClient) {
+          return;
+        }
+        streamActive = false;
+        streamId = null;
+        setSnapshot({
+          status: "error",
+          error: error.message,
+          fallbackMode: "none"
+        });
+      },
+      onClose: () => {
+        if (client !== nextClient) {
+          return;
+        }
+        client = null;
+      }
+    });
+    client = nextClient;
+    setSnapshot({
+      status: "connecting",
+      activeId: input.id ?? null,
+      error: null,
+      requestId,
+      text: content,
+      voice: nextVoice ?? null,
+      fallbackMode: "none"
+    });
+    return {
+      nextClient,
+      nextVoice
+    };
+  };
+  const ensureStreamClient = async (streamOptions) => {
+    const nextRequestId = snapshot.requestId + 1;
+    const nextVoice = streamOptions.voice ?? snapshot.voice ?? void 0;
+    if (client && snapshot.voice === (nextVoice ?? null)) {
+      streamActive = true;
+      streamId = streamOptions.id ?? null;
+      setSnapshot({
+        status: client.isPlaying() ? "speaking" : "connecting",
+        activeId: streamId,
+        error: null,
+        requestId: nextRequestId,
+        voice: nextVoice ?? null,
+        fallbackMode: "none"
+      });
+      client.startStream();
+      return;
+    }
+    stop({ preserveClient: false });
+    const { nextClient } = createManagedClient(
+      {
+        id: streamOptions.id,
+        voice: streamOptions.voice,
+        audioFormat: streamOptions.audioFormat,
+        sampleRate: streamOptions.sampleRate
+      },
+      nextRequestId,
+      ""
+    );
+    streamActive = true;
+    streamId = streamOptions.id ?? null;
+    await nextClient.connect();
+    nextClient.startStream();
+  };
+  const speak = async (speakOptions) => {
+    const content = preprocessTTSContent(speakOptions.text);
+    if (!content) {
+      stop({ preserveClient: false });
+      return;
+    }
+    const nextRequestId = snapshot.requestId + 1;
+    const nextVoice = speakOptions.voice ?? snapshot.voice ?? void 0;
+    if (client && snapshot.text === content && snapshot.voice === (nextVoice ?? null) && client.hasAudio()) {
+      setSnapshot({
+        status: "speaking",
+        activeId: speakOptions.id ?? null,
+        error: null,
+        requestId: nextRequestId,
+        text: content,
+        voice: nextVoice ?? null,
+        fallbackMode: "none"
+      });
+      client.play();
+      return;
+    }
+    stop({ preserveClient: false });
+    streamActive = false;
+    streamId = null;
+    const { nextClient } = createManagedClient(
+      {
+        id: speakOptions.id,
+        voice: speakOptions.voice,
+        audioFormat: speakOptions.audioFormat,
+        sampleRate: speakOptions.sampleRate
+      },
+      nextRequestId,
+      content
+    );
+    try {
+      await nextClient.connect();
+      await nextClient.speak(content);
+    } catch (error) {
+      if (client !== nextClient) {
+        return;
+      }
+      if (options.fallbackToSystemSpeech !== false && canUseSystemSpeech()) {
+        client?.close();
+        client = null;
+        systemSpeak(content, {
+          onStart: () => {
+            setSnapshot({
+              status: "speaking",
+              error: null,
+              activeId: speakOptions.id ?? null,
+              requestId: nextRequestId,
+              text: content,
+              voice: nextVoice ?? null,
+              fallbackMode: "system"
+            });
+          },
+          onEnd: () => {
+            setSnapshot({
+              status: "idle",
+              activeId: null,
+              error: null,
+              fallbackMode: "none"
+            });
+          },
+          onError: (fallbackError) => {
+            setSnapshot({
+              status: "error",
+              error: fallbackError.message,
+              fallbackMode: "none"
+            });
+          }
+        });
+        return;
+      }
+      client?.close();
+      client = null;
+      setSnapshot({
+        status: "error",
+        error: error instanceof Error ? error.message : String(error),
+        fallbackMode: "none"
+      });
+    }
+  };
+  return {
+    getSnapshot() {
+      return snapshot;
+    },
+    subscribe(listener) {
+      listeners.add(listener);
+      return () => {
+        listeners.delete(listener);
+      };
+    },
+    speak,
+    async startStream(streamOptions) {
+      await ensureStreamClient(streamOptions);
+    },
+    async appendStreamText(streamOptions) {
+      const content = preprocessTTSContent(streamOptions.text);
+      if (!content) {
+        return;
+      }
+      if (!streamActive || streamId !== (streamOptions.id ?? null) || !client) {
+        await ensureStreamClient(streamOptions);
+      }
+      client?.appendText(content);
+      setSnapshot({
+        status: snapshot.status === "speaking" ? "speaking" : "connecting",
+        activeId: streamOptions.id ?? null,
+        error: null,
+        text: `${snapshot.text || ""}${content}`,
+        fallbackMode: "none"
+      });
+    },
+    commitStream() {
+      if (!client || !streamActive) {
+        return;
+      }
+      client.commitText();
+      setSnapshot({
+        status: snapshot.status === "speaking" ? "speaking" : "connecting",
+        activeId: streamId,
+        error: null,
+        fallbackMode: "none"
+      });
+    },
+    finishStream() {
+      streamActive = false;
+      streamId = null;
+      setSnapshot({
+        status: client?.isPlaying() ? "speaking" : "idle",
+        activeId: client?.isPlaying() ? snapshot.activeId : null,
+        error: null,
+        fallbackMode: "none"
+      });
+    },
+    stop,
+    release() {
+      stop({ preserveClient: false });
+    },
+    async toggle(toggleOptions) {
+      if (this.isActive(toggleOptions.id)) {
+        stop();
+        return;
+      }
+      await speak(toggleOptions);
+    },
+    isActive(id) {
+      if (!id) {
+        return snapshot.status === "connecting" || snapshot.status === "speaking";
+      }
+      return snapshot.activeId === id && (snapshot.status === "connecting" || snapshot.status === "speaking");
+    },
+    setVoice(voice) {
+      setSnapshot({ voice });
+      persistVoice(voice);
+    },
+    getVoice() {
+      return snapshot.voice;
+    }
+  };
+}
+function createTTSClient(authConfig) {
+  return (config) => createRawTTSClient({ ...authConfig, ...config });
+}
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
-  createTTSClient
+  createTTSClient,
+  createTTSSpeakController,
+  preprocessTTSContent,
+  splitTextIntoFragments
 });
 //# sourceMappingURL=index.cjs.map