npm - @usecrow/ui - Versions diffs - 0.1.57 → 0.1.58 - Mend

@usecrow/ui 0.1.57 → 0.1.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.cts CHANGED Viewed

@@ -761,50 +761,26 @@ declare function useCrowAPI({ onIdentified, onReset }?: UseCrowAPIOptions): {
 };
 /**
- * useVoiceInput - Browser speech-to-text via Web Speech API
+ * useVoiceInput - Speech-to-text via Gradium STT WebSocket
  *
- * Returns recording state, transcript, and controls.
- * Hides itself (supported=false) when the browser lacks SpeechRecognition.
+ * Uses MediaRecorder API (works on all browsers including mobile Safari)
+ * and streams audio to backend WebSocket proxy for Gradium STT.
  */
-interface SpeechRecognitionEvent {
-    results: SpeechRecognitionResultList;
-    resultIndex: number;
-}
-interface SpeechRecognitionErrorEvent {
-    error: string;
-    message?: string;
-}
-interface SpeechRecognitionInstance extends EventTarget {
-    continuous: boolean;
-    interimResults: boolean;
-    lang: string;
-    start(): void;
-    stop(): void;
-    abort(): void;
-    onresult: ((event: SpeechRecognitionEvent) => void) | null;
-    onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
-    onend: (() => void) | null;
-    onspeechend: (() => void) | null;
-}
-declare global {
-    interface Window {
-        SpeechRecognition?: new () => SpeechRecognitionInstance;
-        webkitSpeechRecognition?: new () => SpeechRecognitionInstance;
-    }
-}
 interface UseVoiceInputOptions {
-    /** Language for recognition (default: browser language or "en-US") */
-    lang?: string;
-    /** Auto-submit after silence. If set, calls onTranscript with final text after this many ms of silence. */
+    /** Backend URL for WebSocket connection (e.g., "ws://localhost:8000" or "wss://api.example.com") */
+    backendUrl: string;
+    /** Auto-submit after silence. If set, stops recording after this many ms of silence. */
     silenceTimeoutMs?: number;
 }
 interface UseVoiceInputReturn {
-    /** Whether the browser supports speech recognition */
+    /** Whether the browser supports audio recording (MediaRecorder API) */
     supported: boolean;
     /** Whether currently recording */
     isRecording: boolean;
-    /** Current transcript (includes interim results while recording) */
+    /** Current transcript (accumulated final results) */
     transcript: string;
+    /** Error message if any */
+    error: string | null;
     /** Start recording */
     start: () => void;
     /** Stop recording and finalize transcript */
@@ -814,7 +790,7 @@ interface UseVoiceInputReturn {
     /** Clear the transcript */
     clear: () => void;
 }
-declare function useVoiceInput(options?: UseVoiceInputOptions): UseVoiceInputReturn;
+declare function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputReturn;
 /**
  * useWidgetStyles Hook
@@ -1144,6 +1120,10 @@ interface PromptInputBoxProps {
     availableModels?: Model[];
     /** When true, adds a pulsing highlight effect to indicate user input is needed */
     highlighted?: boolean;
+    /** Backend URL for voice input WebSocket (required for voice input to work) */
+    backendUrl?: string;
+    /** When this value changes (and is > 0), start voice recording */
+    triggerVoiceRecording?: number;
 }
 declare const PromptInputBox: react__default.ForwardRefExoticComponent<PromptInputBoxProps & react__default.RefAttributes<HTMLDivElement>>;

package/dist/index.d.ts CHANGED Viewed

@@ -761,50 +761,26 @@ declare function useCrowAPI({ onIdentified, onReset }?: UseCrowAPIOptions): {
 };
 /**
- * useVoiceInput - Browser speech-to-text via Web Speech API
+ * useVoiceInput - Speech-to-text via Gradium STT WebSocket
  *
- * Returns recording state, transcript, and controls.
- * Hides itself (supported=false) when the browser lacks SpeechRecognition.
+ * Uses MediaRecorder API (works on all browsers including mobile Safari)
+ * and streams audio to backend WebSocket proxy for Gradium STT.
  */
-interface SpeechRecognitionEvent {
-    results: SpeechRecognitionResultList;
-    resultIndex: number;
-}
-interface SpeechRecognitionErrorEvent {
-    error: string;
-    message?: string;
-}
-interface SpeechRecognitionInstance extends EventTarget {
-    continuous: boolean;
-    interimResults: boolean;
-    lang: string;
-    start(): void;
-    stop(): void;
-    abort(): void;
-    onresult: ((event: SpeechRecognitionEvent) => void) | null;
-    onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
-    onend: (() => void) | null;
-    onspeechend: (() => void) | null;
-}
-declare global {
-    interface Window {
-        SpeechRecognition?: new () => SpeechRecognitionInstance;
-        webkitSpeechRecognition?: new () => SpeechRecognitionInstance;
-    }
-}
 interface UseVoiceInputOptions {
-    /** Language for recognition (default: browser language or "en-US") */
-    lang?: string;
-    /** Auto-submit after silence. If set, calls onTranscript with final text after this many ms of silence. */
+    /** Backend URL for WebSocket connection (e.g., "ws://localhost:8000" or "wss://api.example.com") */
+    backendUrl: string;
+    /** Auto-submit after silence. If set, stops recording after this many ms of silence. */
     silenceTimeoutMs?: number;
 }
 interface UseVoiceInputReturn {
-    /** Whether the browser supports speech recognition */
+    /** Whether the browser supports audio recording (MediaRecorder API) */
     supported: boolean;
     /** Whether currently recording */
     isRecording: boolean;
-    /** Current transcript (includes interim results while recording) */
+    /** Current transcript (accumulated final results) */
     transcript: string;
+    /** Error message if any */
+    error: string | null;
     /** Start recording */
     start: () => void;
     /** Stop recording and finalize transcript */
@@ -814,7 +790,7 @@ interface UseVoiceInputReturn {
     /** Clear the transcript */
     clear: () => void;
 }
-declare function useVoiceInput(options?: UseVoiceInputOptions): UseVoiceInputReturn;
+declare function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputReturn;
 /**
  * useWidgetStyles Hook
@@ -1144,6 +1120,10 @@ interface PromptInputBoxProps {
     availableModels?: Model[];
     /** When true, adds a pulsing highlight effect to indicate user input is needed */
     highlighted?: boolean;
+    /** Backend URL for voice input WebSocket (required for voice input to work) */
+    backendUrl?: string;
+    /** When this value changes (and is > 0), start voice recording */
+    triggerVoiceRecording?: number;
 }
 declare const PromptInputBox: react__default.ForwardRefExoticComponent<PromptInputBoxProps & react__default.RefAttributes<HTMLDivElement>>;

package/dist/index.js CHANGED Viewed

@@ -1743,6 +1743,186 @@ function usePreviewCopilotStyles(previewStyles) {
     styles: mergeCopilotStyles(void 0, previewStyles)
   };
 }
+function useTTSOutput({
+  backendUrl,
+  voiceId = "YTpq7expH9539ERJ"
+}) {
+  const [isSpeaking, setIsSpeaking] = useState(false);
+  const [error, setError] = useState(null);
+  const wsRef = useRef(null);
+  const audioContextRef = useRef(null);
+  const nextTimeRef = useRef(0);
+  const streamCompleteRef = useRef(false);
+  const completionCheckIntervalRef = useRef(null);
+  const cleanupAudioContext = useCallback(() => {
+    setIsSpeaking(false);
+    if (audioContextRef.current && audioContextRef.current.state !== "closed") {
+      audioContextRef.current.close();
+      audioContextRef.current = null;
+    }
+    if (completionCheckIntervalRef.current) {
+      clearInterval(completionCheckIntervalRef.current);
+      completionCheckIntervalRef.current = null;
+    }
+  }, []);
+  const closeWebSocket = useCallback(() => {
+    if (wsRef.current && wsRef.current.readyState === WebSocket.OPEN) {
+      try {
+        wsRef.current.send(JSON.stringify({ type: "stop" }));
+        wsRef.current.close();
+      } catch (e) {
+      }
+    }
+    wsRef.current = null;
+  }, []);
+  const cleanupTTS = useCallback(() => {
+    setIsSpeaking(false);
+    setError(null);
+    closeWebSocket();
+    cleanupAudioContext();
+  }, [closeWebSocket, cleanupAudioContext]);
+  const waitForAudioComplete = useCallback(() => {
+    if (completionCheckIntervalRef.current) {
+      clearInterval(completionCheckIntervalRef.current);
+    }
+    completionCheckIntervalRef.current = setInterval(() => {
+      if (!audioContextRef.current) {
+        if (completionCheckIntervalRef.current) {
+          clearInterval(completionCheckIntervalRef.current);
+          completionCheckIntervalRef.current = null;
+        }
+        return;
+      }
+      const now = audioContextRef.current.currentTime;
+      if (now >= nextTimeRef.current) {
+        if (completionCheckIntervalRef.current) {
+          clearInterval(completionCheckIntervalRef.current);
+          completionCheckIntervalRef.current = null;
+        }
+        cleanupAudioContext();
+      }
+    }, 100);
+  }, [cleanupAudioContext]);
+  const playAudioChunk = useCallback((base64Audio) => {
+    if (!audioContextRef.current || audioContextRef.current.state === "closed") {
+      console.error("TTS: AudioContext not available");
+      return;
+    }
+    try {
+      const binary = atob(base64Audio);
+      const bytes = new Uint8Array(binary.length);
+      for (let i = 0; i < binary.length; i++) {
+        bytes[i] = binary.charCodeAt(i);
+      }
+      const pcm16 = new Int16Array(bytes.buffer);
+      const float32 = new Float32Array(pcm16.length);
+      for (let i = 0; i < pcm16.length; i++) {
+        float32[i] = pcm16[i] / 32768;
+      }
+      const buffer = audioContextRef.current.createBuffer(1, float32.length, 48e3);
+      buffer.getChannelData(0).set(float32);
+      const source = audioContextRef.current.createBufferSource();
+      source.buffer = buffer;
+      source.connect(audioContextRef.current.destination);
+      const now = audioContextRef.current.currentTime;
+      if (nextTimeRef.current < now) {
+        nextTimeRef.current = now;
+      }
+      source.start(nextTimeRef.current);
+      nextTimeRef.current += buffer.duration;
+    } catch (err) {
+      console.error("TTS: Error playing audio chunk:", err);
+      setError(err instanceof Error ? err.message : "Failed to play audio chunk");
+    }
+  }, []);
+  const speak = useCallback(
+    (text) => {
+      console.log("[TTS Hook] speak called with:", text.substring(0, 50), "backendUrl:", backendUrl);
+      if (!text.trim()) {
+        console.log("[TTS Hook] No text to speak");
+        setError("No text to speak");
+        return;
+      }
+      if (isSpeaking || wsRef.current) {
+        console.log("[TTS Hook] Already playing");
+        setError("Already playing, stop first");
+        return;
+      }
+      setError(null);
+      nextTimeRef.current = 0;
+      streamCompleteRef.current = false;
+      try {
+        audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)({
+          sampleRate: 48e3
+        });
+        const url = backendUrl.startsWith("http") ? backendUrl.replace(/^http/, "ws") : backendUrl;
+        const wsUrl = `${url}/api/tts/stream`;
+        console.log("[TTS Hook] Connecting to:", wsUrl);
+        const ws = new WebSocket(wsUrl);
+        wsRef.current = ws;
+        ws.onopen = () => {
+          ws.send(
+            JSON.stringify({
+              type: "setup",
+              voice_id: voiceId,
+              output_format: "pcm"
+            })
+          );
+        };
+        ws.onmessage = (event) => {
+          const msg = JSON.parse(event.data);
+          if (msg.type === "ready") {
+            ws.send(JSON.stringify({ type: "text", text }));
+            ws.send(JSON.stringify({ type: "end_of_stream" }));
+          } else if (msg.type === "audio") {
+            playAudioChunk(msg.audio);
+          } else if (msg.type === "done") {
+            streamCompleteRef.current = true;
+            closeWebSocket();
+            waitForAudioComplete();
+          } else if (msg.type === "error") {
+            setError(msg.message || "TTS error");
+            cleanupTTS();
+          }
+        };
+        ws.onerror = () => {
+          setError("WebSocket error");
+          cleanupTTS();
+        };
+        ws.onclose = () => {
+          wsRef.current = null;
+        };
+        setIsSpeaking(true);
+      } catch (err) {
+        setError(err instanceof Error ? err.message : "Failed to start TTS");
+        cleanupTTS();
+      }
+    },
+    [
+      isSpeaking,
+      backendUrl,
+      voiceId,
+      playAudioChunk,
+      closeWebSocket,
+      waitForAudioComplete,
+      cleanupTTS
+    ]
+  );
+  const stop = useCallback(() => {
+    cleanupTTS();
+  }, [cleanupTTS]);
+  useEffect(() => {
+    return () => {
+      cleanupTTS();
+    };
+  }, [cleanupTTS]);
+  return {
+    speak,
+    stop,
+    isSpeaking,
+    error
+  };
+}
 var WidgetStyleContext = createContext(null);
 function WidgetStyleProvider({
   children,
@@ -2730,80 +2910,176 @@ var ModelSelector = ({
     ] }, provider)) })
   ] });
 };
-var getSpeechRecognition = () => {
-  if (typeof window === "undefined") return null;
-  return window.SpeechRecognition || window.webkitSpeechRecognition || null;
+var isMediaRecorderSupported = () => {
+  if (typeof window === "undefined") return false;
+  return !!(navigator.mediaDevices && typeof navigator.mediaDevices.getUserMedia === "function" && (window.AudioContext || window.webkitAudioContext));
 };
-function useVoiceInput(options = {}) {
-  const { lang, silenceTimeoutMs } = options;
-  const [supported] = useState(() => getSpeechRecognition() !== null);
+function useVoiceInput(options) {
+  const { backendUrl, silenceTimeoutMs } = options;
+  const [supported] = useState(() => isMediaRecorderSupported());
   const [isRecording, setIsRecording] = useState(false);
   const [transcript, setTranscript] = useState("");
-  const recognitionRef = useRef(null);
+  const [error, setError] = useState(null);
+  const wsRef = useRef(null);
+  const streamRef = useRef(null);
+  const audioContextRef = useRef(null);
+  const processorRef = useRef(null);
   const silenceTimerRef = useRef(null);
-  const finalTranscriptRef = useRef("");
+  const transcriptRef = useRef("");
+  const interimRef = useRef("");
+  const isRecordingRef = useRef(false);
   const clearSilenceTimer = useCallback(() => {
     if (silenceTimerRef.current) {
       clearTimeout(silenceTimerRef.current);
       silenceTimerRef.current = null;
     }
   }, []);
-  const stop = useCallback(() => {
+  const cleanup = useCallback(() => {
     clearSilenceTimer();
-    if (recognitionRef.current) {
-      recognitionRef.current.stop();
+    isRecordingRef.current = false;
+    if (interimRef.current) {
+      transcriptRef.current += interimRef.current + " ";
+      setTranscript(transcriptRef.current.trim());
+      interimRef.current = "";
     }
+    if (wsRef.current) {
+      try {
+        if (wsRef.current.readyState === WebSocket.OPEN) {
+          wsRef.current.send(JSON.stringify({ type: "stop" }));
+        }
+        wsRef.current.close();
+      } catch (e) {
+      }
+      wsRef.current = null;
+    }
+    if (processorRef.current) {
+      processorRef.current.disconnect();
+      processorRef.current = null;
+    }
+    if (audioContextRef.current) {
+      audioContextRef.current.close();
+      audioContextRef.current = null;
+    }
+    if (streamRef.current) {
+      streamRef.current.getTracks().forEach((track) => track.stop());
+      streamRef.current = null;
+    }
+    setIsRecording(false);
   }, [clearSilenceTimer]);
+  const stop = useCallback(() => {
+    cleanup();
+  }, [cleanup]);
   const clear = useCallback(() => {
     setTranscript("");
-    finalTranscriptRef.current = "";
+    transcriptRef.current = "";
+    setError(null);
   }, []);
-  const start = useCallback(() => {
-    const SpeechRecognition = getSpeechRecognition();
-    if (!SpeechRecognition) return;
-    if (recognitionRef.current) {
-      recognitionRef.current.abort();
-    }
-    finalTranscriptRef.current = "";
-    setTranscript("");
-    const recognition = new SpeechRecognition();
-    recognition.continuous = true;
-    recognition.interimResults = true;
-    recognition.lang = lang || navigator.language || "en-US";
-    recognition.onresult = (event) => {
-      let interim = "";
-      let final = "";
-      for (let i = 0; i < event.results.length; i++) {
-        const result = event.results[i];
-        if (result.isFinal) {
-          final += result[0].transcript;
-        } else {
-          interim += result[0].transcript;
-        }
+  const startAudioCapture = useCallback(() => {
+    if (!streamRef.current || !wsRef.current) return;
+    audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24e3 });
+    const source = audioContextRef.current.createMediaStreamSource(
+      streamRef.current
+    );
+    processorRef.current = audioContextRef.current.createScriptProcessor(
+      4096,
+      1,
+      1
+    );
+    processorRef.current.onaudioprocess = (event) => {
+      if (!isRecordingRef.current || !wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) {
+        return;
       }
-      finalTranscriptRef.current = final;
-      setTranscript(final + interim);
-      if (silenceTimeoutMs) {
-        clearSilenceTimer();
-        silenceTimerRef.current = setTimeout(() => {
-          stop();
-        }, silenceTimeoutMs);
+      const inputData = event.inputBuffer.getChannelData(0);
+      const pcm16 = new Int16Array(inputData.length);
+      for (let i = 0; i < inputData.length; i++) {
+        const s = Math.max(-1, Math.min(1, inputData[i]));
+        pcm16[i] = s < 0 ? s * 32768 : s * 32767;
       }
-    };
-    recognition.onerror = (event) => {
-      if (event.error !== "aborted") {
-        console.warn("[Crow Voice] Speech recognition error:", event.error);
+      const bytes = new Uint8Array(pcm16.buffer);
+      let binary = "";
+      for (let i = 0; i < bytes.length; i++) {
+        binary += String.fromCharCode(bytes[i]);
       }
-      setIsRecording(false);
-    };
-    recognition.onend = () => {
-      setIsRecording(false);
-      recognitionRef.current = null;
+      wsRef.current.send(
+        JSON.stringify({ type: "audio", data: btoa(binary) })
+      );
     };
-    recognitionRef.current = recognition;
-    recognition.start();
-    setIsRecording(true);
-  }, [lang, silenceTimeoutMs, clearSilenceTimer, stop]);
+    source.connect(processorRef.current);
+    processorRef.current.connect(audioContextRef.current.destination);
+  }, []);
+  const start = useCallback(async () => {
+    if (!supported) {
+      setError("Audio recording not supported in this browser");
+      return;
+    }
+    setError(null);
+    transcriptRef.current = "";
+    setTranscript("");
+    try {
+      streamRef.current = await navigator.mediaDevices.getUserMedia({
+        audio: {
+          echoCancellation: true,
+          noiseSuppression: true,
+          sampleRate: 24e3
+        }
+      });
+      const wsProtocol = backendUrl.startsWith("https") ? "wss" : "ws";
+      const wsHost = backendUrl.replace(/^https?:\/\//, "");
+      const wsUrl = `${wsProtocol}://${wsHost}/api/stt/stream`;
+      wsRef.current = new WebSocket(wsUrl);
+      wsRef.current.onopen = () => {
+        wsRef.current?.send(JSON.stringify({ type: "setup" }));
+      };
+      wsRef.current.onmessage = (event) => {
+        const msg = JSON.parse(event.data);
+        if (msg.type === "ready") {
+          startAudioCapture();
+          isRecordingRef.current = true;
+          setIsRecording(true);
+        } else if (msg.type === "transcript") {
+          if (msg.is_final && msg.text) {
+            transcriptRef.current += msg.text + " ";
+            interimRef.current = "";
+            setTranscript(transcriptRef.current.trim());
+            if (silenceTimeoutMs) {
+              clearSilenceTimer();
+              silenceTimerRef.current = setTimeout(() => {
+                stop();
+              }, silenceTimeoutMs);
+            }
+          } else if (!msg.is_final && msg.text) {
+            interimRef.current = msg.text;
+            setTranscript((transcriptRef.current + msg.text).trim());
+          }
+        } else if (msg.type === "error") {
+          setError(msg.message || "STT error");
+          cleanup();
+        }
+      };
+      wsRef.current.onerror = () => {
+        setError("WebSocket connection error");
+        cleanup();
+      };
+      wsRef.current.onclose = () => {
+        if (isRecordingRef.current) {
+          cleanup();
+        }
+      };
+    } catch (err) {
+      setError(
+        err instanceof Error ? err.message : "Failed to start recording"
+      );
+      cleanup();
+    }
+  }, [
+    supported,
+    backendUrl,
+    startAudioCapture,
+    silenceTimeoutMs,
+    clearSilenceTimer,
+    stop,
+    cleanup
+  ]);
   const toggle = useCallback(() => {
     if (isRecording) {
       stop();
@@ -2813,13 +3089,19 @@ function useVoiceInput(options = {}) {
   }, [isRecording, start, stop]);
   useEffect(() => {
     return () => {
-      clearSilenceTimer();
-      if (recognitionRef.current) {
-        recognitionRef.current.abort();
-      }
+      cleanup();
     };
-  }, [clearSilenceTimer]);
-  return { supported, isRecording, transcript, start, stop, toggle, clear };
+  }, [cleanup]);
+  return {
+    supported,
+    isRecording,
+    transcript,
+    error,
+    start,
+    stop,
+    toggle,
+    clear
+  };
 }
 var Textarea = React3.forwardRef(
   ({ className, ...props }, ref) => /* @__PURE__ */ jsx(
@@ -3013,11 +3295,23 @@ var PromptInputBox = React3.forwardRef(
     selectedModel = "gpt-4o",
     onModelChange,
     availableModels = [],
-    highlighted = false
+    highlighted = false,
+    backendUrl = "",
+    triggerVoiceRecording = 0
   }, ref) => {
     const [input, setInput] = React3.useState("");
     const promptBoxRef = React3.useRef(null);
-    const voice = useVoiceInput();
+    const voice = useVoiceInput({ backendUrl, silenceTimeoutMs: 1500 });
+    const lastTriggerRef = React3.useRef(0);
+    const voiceRef = React3.useRef(voice);
+    voiceRef.current = voice;
+    React3.useEffect(() => {
+      if (triggerVoiceRecording > 0 && triggerVoiceRecording !== lastTriggerRef.current) {
+        console.log("[Voice] Auto-starting recording from trigger");
+        voiceRef.current.start();
+      }
+      lastTriggerRef.current = triggerVoiceRecording;
+    }, [triggerVoiceRecording]);
     React3.useEffect(() => {
       if (voice.isRecording && voice.transcript) {
         setInput(voice.transcript);
@@ -3026,11 +3320,16 @@ var PromptInputBox = React3.forwardRef(
     const wasRecordingRef = React3.useRef(false);
     React3.useEffect(() => {
       if (wasRecordingRef.current && !voice.isRecording && voice.transcript) {
-        setInput(voice.transcript);
+        const messageToSend = voice.transcript.trim();
+        if (messageToSend) {
+          console.log("[Voice] Auto-sending:", messageToSend);
+          onSend(messageToSend);
+          setInput("");
+        }
         voice.clear();
       }
       wasRecordingRef.current = voice.isRecording;
-    }, [voice.isRecording, voice.transcript, voice.clear]);
+    }, [voice.isRecording, voice.transcript, voice.clear, onSend]);
     const handleSubmit = () => {
       if (input.trim()) {
         if (voice.isRecording) {
@@ -3641,6 +3940,25 @@ function CrowWidget({
       setShouldRestoreHistory(true);
     }
   });
+  const tts = useTTSOutput({ backendUrl: apiUrl });
+  const ttsRef = useRef(tts);
+  ttsRef.current = tts;
+  const wasLoadingRef = useRef(false);
+  useEffect(() => {
+    console.log("[Crow TTS] isLoading changed:", chat.isLoading, "wasLoading:", wasLoadingRef.current);
+    if (wasLoadingRef.current && !chat.isLoading) {
+      const lastMessage = [...chat.messages].reverse().find((m) => m.isBot);
+      console.log("[Crow TTS] Last bot message:", lastMessage?.content?.substring(0, 50));
+      if (lastMessage?.content) {
+        const textToSpeak = lastMessage.content.replace(/\*\*/g, "").replace(/\*/g, "").replace(/`[^`]+`/g, "").replace(/\[([^\]]+)\]\([^)]+\)/g, "$1").trim();
+        if (textToSpeak) {
+          console.log("[Crow TTS] Speaking:", textToSpeak.substring(0, 50));
+          ttsRef.current.speak(textToSpeak);
+        }
+      }
+    }
+    wasLoadingRef.current = chat.isLoading;
+  }, [chat.isLoading, chat.messages]);
   useEffect(() => {
     if (initialSuggestions.length > 0 && chat.suggestedActions.length === 0) {
       chat.setSuggestedActions(initialSuggestions);
@@ -4035,7 +4353,8 @@ function CrowWidget({
           isLoading: chat.isLoading,
           showStopButton: isBrowserUseActive || !!askUserResolver || !!pendingConfirmation,
           highlighted: !!askUserResolver,
-          className: "crow-backdrop-blur-md"
+          className: "crow-backdrop-blur-md",
+          backendUrl: apiUrl
         }
       )
     ] })